Move test of custom sig heuristic to plugin (#50891) (#51067)

This moves the testing of custom significance heuristic plugins from an
`ESIntegTestCase` to an example plugin. This is *much* more "real" and
can be used as an example for anyone that needs to actually build such a
plugin. The old test had testing concerns and the example all jumbled
together.
This commit is contained in:
Nik Everett 2020-01-16 14:49:12 -05:00 committed by GitHub
parent 2353fe47fc
commit f6c89b4599
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 388 additions and 148 deletions

View File

@ -0,0 +1,28 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
apply plugin: 'elasticsearch.testclusters'
apply plugin: 'elasticsearch.esplugin'
esplugin {
name 'custom-significance-heuristic'
description 'An example plugin showing how to write and register a custom significance heuristic'
classname 'org.elasticsearch.example.customsigheuristic.CustomSignificanceHeuristicPlugin'
licenseFile rootProject.file('licenses/APACHE-LICENSE-2.0.txt')
noticeFile rootProject.file('NOTICE.txt')
}

View File

@ -0,0 +1,38 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.example.customsigheuristic;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.plugins.SearchPlugin;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import java.util.List;
import static java.util.Collections.singletonList;
/**
* Plugin declaring a custom {@link SignificanceHeuristic}.
*/
public class CustomSignificanceHeuristicPlugin extends Plugin implements SearchPlugin {
@Override
public List<SignificanceHeuristicSpec<?>> getSignificanceHeuristics() {
return singletonList(new SignificanceHeuristicSpec<>(SimpleHeuristic.NAME, SimpleHeuristic::new, SimpleHeuristic.PARSER));
}
}

View File

@ -0,0 +1,90 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.example.customsigheuristic;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import java.io.IOException;
/**
* A simple {@linkplain SignificanceHeuristic} used an example of declaring a custom heuristic.
*/
public class SimpleHeuristic extends SignificanceHeuristic {
public static final String NAME = "simple";
public static final ObjectParser<SimpleHeuristic, Void> PARSER = new ObjectParser<>(NAME, SimpleHeuristic::new);
public SimpleHeuristic() {
}
/**
* Read from a stream.
*/
public SimpleHeuristic(StreamInput in) throws IOException {
// Nothing to read
}
@Override
public void writeTo(StreamOutput out) throws IOException {
// Nothing to write
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME).endObject();
return builder;
}
@Override
public int hashCode() {
return 1;
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
return true;
}
/**
* @param subsetFreq The frequency of the term in the selected sample
* @param subsetSize The size of the selected sample (typically number of docs)
* @param supersetFreq The frequency of the term in the superset from which the sample was taken
* @param supersetSize The size of the superset from which the sample was taken (typically number of docs)
* @return a "significance" score
*/
@Override
public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) {
return subsetFreq / subsetSize > supersetFreq / supersetSize ? 2.0 : 1.0;
}
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.example.customsigheuristic;
import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate;
import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase;
public class CustomSignificanceHeuristicClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
public CustomSignificanceHeuristicClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {
super(testCandidate);
}
@ParametersFactory
public static Iterable<Object[]> parameters() throws Exception {
return ESClientYamlSuiteTestCase.createParameters();
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.example.customsigheuristic;
import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.test.AbstractSerializingTestCase;
import java.io.IOException;
import static org.hamcrest.Matchers.equalTo;
public class SimpleHeuristicWireTests extends AbstractSerializingTestCase<SimpleHeuristic> {
@Override
protected SimpleHeuristic doParseInstance(XContentParser parser) throws IOException {
/* Because Heuristics are XContent "fragments" we need to throw away
* the "extra" stuff before calling the parser. */
parser.nextToken();
assertThat(parser.currentToken(), equalTo(Token.START_OBJECT));
parser.nextToken();
assertThat(parser.currentToken(), equalTo(Token.FIELD_NAME));
assertThat(parser.currentName(), equalTo("simple"));
parser.nextToken();
SimpleHeuristic h = SimpleHeuristic.PARSER.apply(parser, null);
assertThat(parser.currentToken(), equalTo(Token.END_OBJECT));
parser.nextToken();
return h;
}
@Override
protected Reader<SimpleHeuristic> instanceReader() {
return SimpleHeuristic::new;
}
@Override
protected SimpleHeuristic createTestInstance() {
return new SimpleHeuristic();
}
}

View File

@ -0,0 +1,16 @@
# tests that the custom suggester plugin is installed
---
"plugin loaded":
- skip:
reason: "contains is a newly added assertion"
features: contains
# Get master node id
- do:
cluster.state: {}
- set: { master_node: master }
- do:
nodes.info: {}
- contains: { nodes.$master.plugins: { name: custom-significance-heuristic } }

View File

@ -0,0 +1,121 @@
setup:
- do:
indices.create:
index: test
body:
mappings:
properties:
text:
type: text
fielddata: true
long:
type: long
- do:
bulk:
refresh: true
index: test
body:
- '{"index": {"_id": "1"}}'
- '{"text": "foo", "long": 1, "class": 1}'
- '{"index": {"_id": "2"}}'
- '{"text": "foo", "long": 1, "class": 1}'
- '{"index": {"_id": "3"}}'
- '{"text": "bar", "long": 0, "class": 0}'
- '{"index": {"_id": "4"}}'
- '{"text": "bar", "long": 0, "class": 0}'
- '{"index": {"_id": "5"}}'
- '{"text": ["foo", "bar"], "long": [1, 0], "class": 1}'
- '{"index": {"_id": "6"}}'
- '{"text": ["foo", "bar"], "long": [1, 0], "class": 0}'
- '{"index": {"_id": "7"}}'
- '{"text": "bar", "long": 0, "class": 0}'
---
"test custom heuristic on significant_text":
- do:
search:
index: test
size: 0
body:
aggs:
class:
terms:
field: class
order: { _key: asc }
aggs:
sig:
significant_text:
field: text
simple: {}
min_doc_count: 1
- match: { aggregations.class.buckets.0.key: 0 }
- match: { aggregations.class.buckets.0.sig.buckets.0.key: bar }
- match: { aggregations.class.buckets.0.sig.buckets.0.score: 2.0 }
- match: { aggregations.class.buckets.0.sig.buckets.1.key: foo }
- match: { aggregations.class.buckets.0.sig.buckets.1.score: 1.0 }
- match: { aggregations.class.buckets.1.key: 1 }
- match: { aggregations.class.buckets.1.sig.buckets.0.key: foo }
- match: { aggregations.class.buckets.1.sig.buckets.0.score: 2.0 }
- match: { aggregations.class.buckets.1.sig.buckets.1.key: bar }
- match: { aggregations.class.buckets.1.sig.buckets.1.score: 1.0 }
---
"test custom heuristic on text significant_terms":
- do:
search:
index: test
size: 0
body:
aggs:
class:
terms:
field: class
order: { _key: asc }
aggs:
sig:
significant_terms:
field: text
simple: {}
min_doc_count: 1
- match: { aggregations.class.buckets.0.key: 0 }
- match: { aggregations.class.buckets.0.sig.buckets.0.key: bar }
- match: { aggregations.class.buckets.0.sig.buckets.0.score: 2.0 }
- match: { aggregations.class.buckets.0.sig.buckets.1.key: foo }
- match: { aggregations.class.buckets.0.sig.buckets.1.score: 1.0 }
- match: { aggregations.class.buckets.1.key: 1 }
- match: { aggregations.class.buckets.1.sig.buckets.0.key: foo }
- match: { aggregations.class.buckets.1.sig.buckets.0.score: 2.0 }
- match: { aggregations.class.buckets.1.sig.buckets.1.key: bar }
- match: { aggregations.class.buckets.1.sig.buckets.1.score: 1.0 }
---
"test custom heuristic on long significant_terms":
- do:
search:
index: test
size: 0
body:
aggs:
class:
terms:
field: class
order: { _key: asc }
aggs:
sig:
significant_terms:
field: long
simple: {}
min_doc_count: 1
- match: { aggregations.class.buckets.0.key: 0 }
- match: { aggregations.class.buckets.0.sig.buckets.0.key: 0 }
- match: { aggregations.class.buckets.0.sig.buckets.0.score: 2.0 }
- match: { aggregations.class.buckets.0.sig.buckets.1.key: 1 }
- match: { aggregations.class.buckets.0.sig.buckets.1.score: 1.0 }
- match: { aggregations.class.buckets.1.key: 1 }
- match: { aggregations.class.buckets.1.sig.buckets.0.key: 1 }
- match: { aggregations.class.buckets.1.sig.buckets.0.score: 2.0 }
- match: { aggregations.class.buckets.1.sig.buckets.1.key: 0 }
- match: { aggregations.class.buckets.1.sig.buckets.1.score: 1.0 }

View File

@ -22,10 +22,7 @@ import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
@ -65,7 +62,6 @@ import java.util.Map;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.function.Function; import java.util.function.Function;
import static java.util.Collections.singletonList;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.search.aggregations.AggregationBuilders.filter; import static org.elasticsearch.search.aggregations.AggregationBuilders.filter;
@ -89,97 +85,14 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase {
@Override @Override
protected Collection<Class<? extends Plugin>> nodePlugins() { protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(CustomSignificanceHeuristicPlugin.class); return Arrays.asList(TestScriptPlugin.class);
}
@Override
protected Collection<Class<? extends Plugin>> transportClientPlugins() {
return Arrays.asList(CustomSignificanceHeuristicPlugin.class);
} }
public String randomExecutionHint() { public String randomExecutionHint() {
return randomBoolean() ? null : randomFrom(SignificantTermsAggregatorFactory.ExecutionMode.values()).toString(); return randomBoolean() ? null : randomFrom(SignificantTermsAggregatorFactory.ExecutionMode.values()).toString();
} }
public void testPlugin() throws Exception { public static class TestScriptPlugin extends MockScriptPlugin implements SearchPlugin {
String type = randomBoolean() ? "text" : "long";
String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";
SharedSignificantTermsTestMethods.index01Docs(type, settings, this);
SearchRequestBuilder request;
if ("text".equals(type) && randomBoolean()) {
// Use significant_text on text fields but occasionally run with alternative of
// significant_terms on legacy fieldData=true too.
request = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE)
.addAggregation(
terms("class")
.field(CLASS_FIELD)
.subAggregation((significantText("sig_terms", TEXT_FIELD))
.significanceHeuristic(new SimpleHeuristic())
.minDocCount(1)
)
);
}else
{
request = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE)
.addAggregation(
terms("class")
.field(CLASS_FIELD)
.subAggregation((significantTerms("sig_terms"))
.field(TEXT_FIELD)
.significanceHeuristic(new SimpleHeuristic())
.minDocCount(1)
)
);
}
SearchResponse response = request.get();
assertSearchResponse(response);
StringTerms classes = response.getAggregations().get("class");
assertThat(classes.getBuckets().size(), equalTo(2));
for (Terms.Bucket classBucket : classes.getBuckets()) {
Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
assertTrue(aggs.containsKey("sig_terms"));
SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
assertThat(agg.getBuckets().size(), equalTo(2));
Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator();
SignificantTerms.Bucket sigBucket = bucketIterator.next();
String term = sigBucket.getKeyAsString();
String classTerm = classBucket.getKeyAsString();
assertTrue(term.equals(classTerm));
assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8));
sigBucket = bucketIterator.next();
assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8));
}
// we run the same test again but this time we do not call assertSearchResponse() before the assertions
// the reason is that this would trigger toXContent and we would like to check that this has no potential side effects
response = request.get();
classes = (StringTerms) response.getAggregations().get("class");
assertThat(classes.getBuckets().size(), equalTo(2));
for (Terms.Bucket classBucket : classes.getBuckets()) {
Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
assertTrue(aggs.containsKey("sig_terms"));
SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
assertThat(agg.getBuckets().size(), equalTo(2));
Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator();
SignificantTerms.Bucket sigBucket = bucketIterator.next();
String term = sigBucket.getKeyAsString();
String classTerm = classBucket.getKeyAsString();
assertTrue(term.equals(classTerm));
assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8));
sigBucket = bucketIterator.next();
assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8));
}
}
public static class CustomSignificanceHeuristicPlugin extends MockScriptPlugin implements SearchPlugin {
@Override
public List<SignificanceHeuristicSpec<?>> getSignificanceHeuristics() {
return singletonList(new SignificanceHeuristicSpec<>(SimpleHeuristic.NAME, SimpleHeuristic::new, SimpleHeuristic.PARSER));
}
@Override @Override
public Map<String, Function<Map<String, Object>, Object>> pluginScripts() { public Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>(); Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
@ -209,65 +122,6 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase {
} }
} }
public static class SimpleHeuristic extends SignificanceHeuristic {
public static final String NAME = "simple";
public static final ObjectParser<SimpleHeuristic, Void> PARSER = new ObjectParser<>(NAME, SimpleHeuristic::new);
public SimpleHeuristic() {
}
/**
* Read from a stream.
*/
public SimpleHeuristic(StreamInput in) throws IOException {
// Nothing to read
}
@Override
public void writeTo(StreamOutput out) throws IOException {
// Nothing to write
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME).endObject();
return builder;
}
@Override
public int hashCode() {
return 1;
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
return true;
}
/**
* @param subsetFreq The frequency of the term in the selected sample
* @param subsetSize The size of the selected sample (typically number of docs)
* @param supersetFreq The frequency of the term in the superset from which the sample was taken
* @param supersetSize The size of the superset from which the sample was taken (typically number of docs)
* @return a "significance" score
*/
@Override
public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) {
return subsetFreq / subsetSize > supersetFreq / supersetSize ? 2.0 : 1.0;
}
}
public void testXContentResponse() throws Exception { public void testXContentResponse() throws Exception {
String type = randomBoolean() ? "text" : "long"; String type = randomBoolean() ? "text" : "long";
String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}"; String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";