This moves the testing of custom significance heuristic plugins from an `ESIntegTestCase` to an example plugin. This is *much* more "real" and can be used as an example for anyone that needs to actually build such a plugin. The old test had testing concerns and the example all jumbled together.
This commit is contained in:
parent
2353fe47fc
commit
f6c89b4599
|
@ -0,0 +1,28 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
apply plugin: 'elasticsearch.testclusters'
|
||||||
|
apply plugin: 'elasticsearch.esplugin'
|
||||||
|
|
||||||
|
esplugin {
|
||||||
|
name 'custom-significance-heuristic'
|
||||||
|
description 'An example plugin showing how to write and register a custom significance heuristic'
|
||||||
|
classname 'org.elasticsearch.example.customsigheuristic.CustomSignificanceHeuristicPlugin'
|
||||||
|
licenseFile rootProject.file('licenses/APACHE-LICENSE-2.0.txt')
|
||||||
|
noticeFile rootProject.file('NOTICE.txt')
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.example.customsigheuristic;
|
||||||
|
|
||||||
|
import org.elasticsearch.plugins.Plugin;
|
||||||
|
import org.elasticsearch.plugins.SearchPlugin;
|
||||||
|
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static java.util.Collections.singletonList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Plugin declaring a custom {@link SignificanceHeuristic}.
|
||||||
|
*/
|
||||||
|
public class CustomSignificanceHeuristicPlugin extends Plugin implements SearchPlugin {
|
||||||
|
@Override
|
||||||
|
public List<SignificanceHeuristicSpec<?>> getSignificanceHeuristics() {
|
||||||
|
return singletonList(new SignificanceHeuristicSpec<>(SimpleHeuristic.NAME, SimpleHeuristic::new, SimpleHeuristic.PARSER));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,90 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.example.customsigheuristic;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
|
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple {@linkplain SignificanceHeuristic} used an example of declaring a custom heuristic.
|
||||||
|
*/
|
||||||
|
public class SimpleHeuristic extends SignificanceHeuristic {
|
||||||
|
public static final String NAME = "simple";
|
||||||
|
public static final ObjectParser<SimpleHeuristic, Void> PARSER = new ObjectParser<>(NAME, SimpleHeuristic::new);
|
||||||
|
|
||||||
|
public SimpleHeuristic() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read from a stream.
|
||||||
|
*/
|
||||||
|
public SimpleHeuristic(StreamInput in) throws IOException {
|
||||||
|
// Nothing to read
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
|
// Nothing to write
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getWriteableName() {
|
||||||
|
return NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
builder.startObject(NAME).endObject();
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (obj == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (getClass() != obj.getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param subsetFreq The frequency of the term in the selected sample
|
||||||
|
* @param subsetSize The size of the selected sample (typically number of docs)
|
||||||
|
* @param supersetFreq The frequency of the term in the superset from which the sample was taken
|
||||||
|
* @param supersetSize The size of the superset from which the sample was taken (typically number of docs)
|
||||||
|
* @return a "significance" score
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) {
|
||||||
|
return subsetFreq / subsetSize > supersetFreq / supersetSize ? 2.0 : 1.0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.example.customsigheuristic;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||||
|
import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate;
|
||||||
|
import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase;
|
||||||
|
|
||||||
|
public class CustomSignificanceHeuristicClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
|
||||||
|
public CustomSignificanceHeuristicClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {
|
||||||
|
super(testCandidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParametersFactory
|
||||||
|
public static Iterable<Object[]> parameters() throws Exception {
|
||||||
|
return ESClientYamlSuiteTestCase.createParameters();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.example.customsigheuristic;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.io.stream.Writeable.Reader;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser.Token;
|
||||||
|
import org.elasticsearch.test.AbstractSerializingTestCase;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
|
public class SimpleHeuristicWireTests extends AbstractSerializingTestCase<SimpleHeuristic> {
|
||||||
|
@Override
|
||||||
|
protected SimpleHeuristic doParseInstance(XContentParser parser) throws IOException {
|
||||||
|
/* Because Heuristics are XContent "fragments" we need to throw away
|
||||||
|
* the "extra" stuff before calling the parser. */
|
||||||
|
parser.nextToken();
|
||||||
|
assertThat(parser.currentToken(), equalTo(Token.START_OBJECT));
|
||||||
|
parser.nextToken();
|
||||||
|
assertThat(parser.currentToken(), equalTo(Token.FIELD_NAME));
|
||||||
|
assertThat(parser.currentName(), equalTo("simple"));
|
||||||
|
parser.nextToken();
|
||||||
|
SimpleHeuristic h = SimpleHeuristic.PARSER.apply(parser, null);
|
||||||
|
assertThat(parser.currentToken(), equalTo(Token.END_OBJECT));
|
||||||
|
parser.nextToken();
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Reader<SimpleHeuristic> instanceReader() {
|
||||||
|
return SimpleHeuristic::new;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected SimpleHeuristic createTestInstance() {
|
||||||
|
return new SimpleHeuristic();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
# tests that the custom suggester plugin is installed
|
||||||
|
---
|
||||||
|
"plugin loaded":
|
||||||
|
- skip:
|
||||||
|
reason: "contains is a newly added assertion"
|
||||||
|
features: contains
|
||||||
|
|
||||||
|
# Get master node id
|
||||||
|
- do:
|
||||||
|
cluster.state: {}
|
||||||
|
- set: { master_node: master }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
nodes.info: {}
|
||||||
|
|
||||||
|
- contains: { nodes.$master.plugins: { name: custom-significance-heuristic } }
|
|
@ -0,0 +1,121 @@
|
||||||
|
setup:
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
mappings:
|
||||||
|
properties:
|
||||||
|
text:
|
||||||
|
type: text
|
||||||
|
fielddata: true
|
||||||
|
long:
|
||||||
|
type: long
|
||||||
|
- do:
|
||||||
|
bulk:
|
||||||
|
refresh: true
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
- '{"index": {"_id": "1"}}'
|
||||||
|
- '{"text": "foo", "long": 1, "class": 1}'
|
||||||
|
- '{"index": {"_id": "2"}}'
|
||||||
|
- '{"text": "foo", "long": 1, "class": 1}'
|
||||||
|
- '{"index": {"_id": "3"}}'
|
||||||
|
- '{"text": "bar", "long": 0, "class": 0}'
|
||||||
|
- '{"index": {"_id": "4"}}'
|
||||||
|
- '{"text": "bar", "long": 0, "class": 0}'
|
||||||
|
- '{"index": {"_id": "5"}}'
|
||||||
|
- '{"text": ["foo", "bar"], "long": [1, 0], "class": 1}'
|
||||||
|
- '{"index": {"_id": "6"}}'
|
||||||
|
- '{"text": ["foo", "bar"], "long": [1, 0], "class": 0}'
|
||||||
|
- '{"index": {"_id": "7"}}'
|
||||||
|
- '{"text": "bar", "long": 0, "class": 0}'
|
||||||
|
|
||||||
|
---
|
||||||
|
"test custom heuristic on significant_text":
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
size: 0
|
||||||
|
body:
|
||||||
|
aggs:
|
||||||
|
class:
|
||||||
|
terms:
|
||||||
|
field: class
|
||||||
|
order: { _key: asc }
|
||||||
|
aggs:
|
||||||
|
sig:
|
||||||
|
significant_text:
|
||||||
|
field: text
|
||||||
|
simple: {}
|
||||||
|
min_doc_count: 1
|
||||||
|
|
||||||
|
- match: { aggregations.class.buckets.0.key: 0 }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.0.key: bar }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.0.score: 2.0 }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.1.key: foo }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.1.score: 1.0 }
|
||||||
|
- match: { aggregations.class.buckets.1.key: 1 }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.0.key: foo }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.0.score: 2.0 }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.1.key: bar }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.1.score: 1.0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
"test custom heuristic on text significant_terms":
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
size: 0
|
||||||
|
body:
|
||||||
|
aggs:
|
||||||
|
class:
|
||||||
|
terms:
|
||||||
|
field: class
|
||||||
|
order: { _key: asc }
|
||||||
|
aggs:
|
||||||
|
sig:
|
||||||
|
significant_terms:
|
||||||
|
field: text
|
||||||
|
simple: {}
|
||||||
|
min_doc_count: 1
|
||||||
|
|
||||||
|
- match: { aggregations.class.buckets.0.key: 0 }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.0.key: bar }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.0.score: 2.0 }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.1.key: foo }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.1.score: 1.0 }
|
||||||
|
- match: { aggregations.class.buckets.1.key: 1 }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.0.key: foo }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.0.score: 2.0 }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.1.key: bar }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.1.score: 1.0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
"test custom heuristic on long significant_terms":
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
size: 0
|
||||||
|
body:
|
||||||
|
aggs:
|
||||||
|
class:
|
||||||
|
terms:
|
||||||
|
field: class
|
||||||
|
order: { _key: asc }
|
||||||
|
aggs:
|
||||||
|
sig:
|
||||||
|
significant_terms:
|
||||||
|
field: long
|
||||||
|
simple: {}
|
||||||
|
min_doc_count: 1
|
||||||
|
|
||||||
|
- match: { aggregations.class.buckets.0.key: 0 }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.0.key: 0 }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.0.score: 2.0 }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.1.key: 1 }
|
||||||
|
- match: { aggregations.class.buckets.0.sig.buckets.1.score: 1.0 }
|
||||||
|
- match: { aggregations.class.buckets.1.key: 1 }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.0.key: 1 }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.0.score: 2.0 }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.1.key: 0 }
|
||||||
|
- match: { aggregations.class.buckets.1.sig.buckets.1.score: 1.0 }
|
|
@ -22,10 +22,7 @@ import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||||
import org.elasticsearch.action.search.SearchRequestBuilder;
|
import org.elasticsearch.action.search.SearchRequestBuilder;
|
||||||
import org.elasticsearch.action.search.SearchResponse;
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.io.stream.StreamInput;
|
|
||||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
|
||||||
import org.elasticsearch.common.xcontent.ToXContent;
|
import org.elasticsearch.common.xcontent.ToXContent;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
|
@ -65,7 +62,6 @@ import java.util.Map;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
|
||||||
import static java.util.Collections.singletonList;
|
|
||||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.filter;
|
import static org.elasticsearch.search.aggregations.AggregationBuilders.filter;
|
||||||
|
@ -89,97 +85,14 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||||
return Arrays.asList(CustomSignificanceHeuristicPlugin.class);
|
return Arrays.asList(TestScriptPlugin.class);
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Collection<Class<? extends Plugin>> transportClientPlugins() {
|
|
||||||
return Arrays.asList(CustomSignificanceHeuristicPlugin.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String randomExecutionHint() {
|
public String randomExecutionHint() {
|
||||||
return randomBoolean() ? null : randomFrom(SignificantTermsAggregatorFactory.ExecutionMode.values()).toString();
|
return randomBoolean() ? null : randomFrom(SignificantTermsAggregatorFactory.ExecutionMode.values()).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPlugin() throws Exception {
|
public static class TestScriptPlugin extends MockScriptPlugin implements SearchPlugin {
|
||||||
String type = randomBoolean() ? "text" : "long";
|
|
||||||
String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";
|
|
||||||
SharedSignificantTermsTestMethods.index01Docs(type, settings, this);
|
|
||||||
SearchRequestBuilder request;
|
|
||||||
if ("text".equals(type) && randomBoolean()) {
|
|
||||||
// Use significant_text on text fields but occasionally run with alternative of
|
|
||||||
// significant_terms on legacy fieldData=true too.
|
|
||||||
request = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE)
|
|
||||||
.addAggregation(
|
|
||||||
terms("class")
|
|
||||||
.field(CLASS_FIELD)
|
|
||||||
.subAggregation((significantText("sig_terms", TEXT_FIELD))
|
|
||||||
.significanceHeuristic(new SimpleHeuristic())
|
|
||||||
.minDocCount(1)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}else
|
|
||||||
{
|
|
||||||
request = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE)
|
|
||||||
.addAggregation(
|
|
||||||
terms("class")
|
|
||||||
.field(CLASS_FIELD)
|
|
||||||
.subAggregation((significantTerms("sig_terms"))
|
|
||||||
.field(TEXT_FIELD)
|
|
||||||
.significanceHeuristic(new SimpleHeuristic())
|
|
||||||
.minDocCount(1)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchResponse response = request.get();
|
|
||||||
assertSearchResponse(response);
|
|
||||||
StringTerms classes = response.getAggregations().get("class");
|
|
||||||
assertThat(classes.getBuckets().size(), equalTo(2));
|
|
||||||
for (Terms.Bucket classBucket : classes.getBuckets()) {
|
|
||||||
Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
|
|
||||||
assertTrue(aggs.containsKey("sig_terms"));
|
|
||||||
SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
|
|
||||||
assertThat(agg.getBuckets().size(), equalTo(2));
|
|
||||||
Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator();
|
|
||||||
SignificantTerms.Bucket sigBucket = bucketIterator.next();
|
|
||||||
String term = sigBucket.getKeyAsString();
|
|
||||||
String classTerm = classBucket.getKeyAsString();
|
|
||||||
assertTrue(term.equals(classTerm));
|
|
||||||
assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8));
|
|
||||||
sigBucket = bucketIterator.next();
|
|
||||||
assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8));
|
|
||||||
}
|
|
||||||
|
|
||||||
// we run the same test again but this time we do not call assertSearchResponse() before the assertions
|
|
||||||
// the reason is that this would trigger toXContent and we would like to check that this has no potential side effects
|
|
||||||
|
|
||||||
response = request.get();
|
|
||||||
|
|
||||||
classes = (StringTerms) response.getAggregations().get("class");
|
|
||||||
assertThat(classes.getBuckets().size(), equalTo(2));
|
|
||||||
for (Terms.Bucket classBucket : classes.getBuckets()) {
|
|
||||||
Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
|
|
||||||
assertTrue(aggs.containsKey("sig_terms"));
|
|
||||||
SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
|
|
||||||
assertThat(agg.getBuckets().size(), equalTo(2));
|
|
||||||
Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator();
|
|
||||||
SignificantTerms.Bucket sigBucket = bucketIterator.next();
|
|
||||||
String term = sigBucket.getKeyAsString();
|
|
||||||
String classTerm = classBucket.getKeyAsString();
|
|
||||||
assertTrue(term.equals(classTerm));
|
|
||||||
assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8));
|
|
||||||
sigBucket = bucketIterator.next();
|
|
||||||
assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class CustomSignificanceHeuristicPlugin extends MockScriptPlugin implements SearchPlugin {
|
|
||||||
@Override
|
|
||||||
public List<SignificanceHeuristicSpec<?>> getSignificanceHeuristics() {
|
|
||||||
return singletonList(new SignificanceHeuristicSpec<>(SimpleHeuristic.NAME, SimpleHeuristic::new, SimpleHeuristic.PARSER));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
|
public Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
|
||||||
Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
|
Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
|
||||||
|
@ -209,65 +122,6 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class SimpleHeuristic extends SignificanceHeuristic {
|
|
||||||
public static final String NAME = "simple";
|
|
||||||
public static final ObjectParser<SimpleHeuristic, Void> PARSER = new ObjectParser<>(NAME, SimpleHeuristic::new);
|
|
||||||
|
|
||||||
public SimpleHeuristic() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Read from a stream.
|
|
||||||
*/
|
|
||||||
public SimpleHeuristic(StreamInput in) throws IOException {
|
|
||||||
// Nothing to read
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void writeTo(StreamOutput out) throws IOException {
|
|
||||||
// Nothing to write
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getWriteableName() {
|
|
||||||
return NAME;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
|
||||||
builder.startObject(NAME).endObject();
|
|
||||||
return builder;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (obj == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (getClass() != obj.getClass()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param subsetFreq The frequency of the term in the selected sample
|
|
||||||
* @param subsetSize The size of the selected sample (typically number of docs)
|
|
||||||
* @param supersetFreq The frequency of the term in the superset from which the sample was taken
|
|
||||||
* @param supersetSize The size of the superset from which the sample was taken (typically number of docs)
|
|
||||||
* @return a "significance" score
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) {
|
|
||||||
return subsetFreq / subsetSize > supersetFreq / supersetSize ? 2.0 : 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testXContentResponse() throws Exception {
|
public void testXContentResponse() throws Exception {
|
||||||
String type = randomBoolean() ? "text" : "long";
|
String type = randomBoolean() ? "text" : "long";
|
||||||
String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";
|
String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";
|
||||||
|
|
Loading…
Reference in New Issue