Clean up significant terms aggregation results

* Clean up the generics around significant terms aggregation results
* Reduce code duplicated between `SignificantLongTerms` and
`SignificantStringTerms` by creating `InternalMappedSignificantTerms`
and moving common things there where possible.
* Migrate to `NamedWriteable`
* Line length fixes while I was there
This commit is contained in:
Nik Everett 2016-07-07 22:06:36 -04:00
parent 920bd0cf68
commit f479219ca7
16 changed files with 505 additions and 422 deletions

View File

@ -583,12 +583,8 @@
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]sampler[/\\]InternalSampler.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]sampler[/\\]SamplerAggregator.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]GlobalOrdinalsSignificantTermsAggregator.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]InternalSignificantTerms.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]SignificantLongTerms.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]SignificantStringTerms.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]SignificantTermsAggregatorFactory.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]SignificantTermsParametersParser.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]UnmappedSignificantTerms.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]heuristics[/\\]GND.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]heuristics[/\\]NXYSignificanceHeuristic.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]heuristics[/\\]PercentageScore.java" checks="LineLength" />
@ -1044,7 +1040,6 @@
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]TermsDocCountErrorIT.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]TermsShardMinDocCountIT.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]nested[/\\]NestedAggregatorTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]bucket[/\\]significant[/\\]SignificanceHeuristicTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]AbstractGeoTestCase.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]AvgIT.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]SumIT.java" checks="LineLength" />

View File

@ -526,12 +526,14 @@ public class SearchModule extends AbstractModule {
.addResultReader(UnmappedTerms.NAME, UnmappedTerms::new)
.addResultReader(LongTerms.NAME, LongTerms::new)
.addResultReader(DoubleTerms.NAME, DoubleTerms::new));
registerAggregation(SignificantTermsAggregationBuilder::new,
registerAggregation(new AggregationSpec(SignificantTermsAggregationBuilder::new,
new SignificantTermsParser(significanceHeuristicParserRegistry, queryParserRegistry),
SignificantTermsAggregationBuilder.AGGREGATION_NAME_FIELD);
registerAggregation(
new AggregationSpec(RangeAggregationBuilder::new, new RangeParser(), RangeAggregationBuilder.AGGREGATION_NAME_FIELD)
.addResultReader(InternalRange::new));
SignificantTermsAggregationBuilder.AGGREGATION_NAME_FIELD)
.addResultReader(SignificantStringTerms.NAME, SignificantStringTerms::new)
.addResultReader(SignificantLongTerms.NAME, SignificantLongTerms::new)
.addResultReader(UnmappedSignificantTerms.NAME, UnmappedSignificantTerms::new));
registerAggregation(new AggregationSpec(RangeAggregationBuilder::new, new RangeParser(),
RangeAggregationBuilder.AGGREGATION_NAME_FIELD).addResultReader(InternalRange::new));
registerAggregation(new AggregationSpec(DateRangeAggregationBuilder::new, new DateRangeParser(),
DateRangeAggregationBuilder.AGGREGATION_NAME_FIELD).addResultReader(InternalDateRange::new));
registerAggregation(IpRangeAggregationBuilder::new, new IpRangeParser(), IpRangeAggregationBuilder.AGGREGATION_NAME_FIELD);
@ -818,9 +820,6 @@ public class SearchModule extends AbstractModule {
static {
// buckets
SignificantStringTerms.registerStreams();
SignificantLongTerms.registerStreams();
UnmappedSignificantTerms.registerStreams();
InternalGeoHashGrid.registerStreams();
InternalBinaryRange.registerStream();
InternalHistogram.registerStream();

View File

@ -21,7 +21,7 @@ package org.elasticsearch.search.aggregations.bucket.significant;
import org.apache.lucene.util.PriorityQueue;
public class BucketSignificancePriorityQueue extends PriorityQueue<SignificantTerms.Bucket> {
public class BucketSignificancePriorityQueue<B extends SignificantTerms.Bucket> extends PriorityQueue<B> {
public BucketSignificancePriorityQueue(int size) {
super(size);

View File

@ -38,10 +38,11 @@ import org.elasticsearch.search.internal.ContextIndexSearcher;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import static java.util.Collections.emptyList;
/**
* An global ordinal based implementation of significant terms, based on {@link SignificantStringTermsAggregator}.
*/
@ -94,7 +95,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
long supersetSize = termsAggFactory.getSupersetNumDocs();
long subsetSize = numCollectedDocs;
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(size);
SignificantStringTerms.Bucket spare = null;
for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
@ -123,21 +124,20 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
// Back at the central reducer these properties will be updated with
// global stats
spare.updateScore(significanceHeuristic);
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
spare = ordered.insertWithOverflow(spare);
}
final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];
final SignificantStringTerms.Bucket[] list = new SignificantStringTerms.Bucket[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
final SignificantStringTerms.Bucket bucket = (SignificantStringTerms.Bucket) ordered.pop();
final SignificantStringTerms.Bucket bucket = ordered.pop();
// the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
list[i] = bucket;
}
return new SignificantStringTerms(subsetSize, supersetSize, name, format, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), significanceHeuristic, Arrays.asList(list), pipelineAggregators(),
metaData());
return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
pipelineAggregators(), metaData(), format, subsetSize, supersetSize, significanceHeuristic, Arrays.asList(list));
}
@Override
@ -146,9 +146,8 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
ContextIndexSearcher searcher = context.searchContext().searcher();
IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs();
return new SignificantStringTerms(0, supersetSize, name, format, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), significanceHeuristic,
Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
pipelineAggregators(), metaData(), format, 0, supersetSize, significanceHeuristic, emptyList());
}
@Override

View File

@ -0,0 +1,102 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.significant;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
public abstract class InternalMappedSignificantTerms<
A extends InternalMappedSignificantTerms<A, B>,
B extends InternalSignificantTerms.Bucket<B>>
extends InternalSignificantTerms<A, B> {
protected final DocValueFormat format;
protected final long subsetSize;
protected final long supersetSize;
protected final SignificanceHeuristic significanceHeuristic;
protected final List<B> buckets;
protected Map<String, B> bucketMap;
protected InternalMappedSignificantTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData, DocValueFormat format, long subsetSize, long supersetSize,
SignificanceHeuristic significanceHeuristic, List<B> buckets) {
super(name, requiredSize, minDocCount, pipelineAggregators, metaData);
this.format = format;
this.buckets = buckets;
this.subsetSize = subsetSize;
this.supersetSize = supersetSize;
this.significanceHeuristic = significanceHeuristic;
}
protected InternalMappedSignificantTerms(StreamInput in, Bucket.Reader<B> bucketReader) throws IOException {
super(in);
format = in.readNamedWriteable(DocValueFormat.class);
subsetSize = in.readVLong();
supersetSize = in.readVLong();
significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class);
buckets = in.readList(stream -> bucketReader.read(stream, subsetSize, supersetSize, format));
}
@Override
protected final void writeTermTypeInfoTo(StreamOutput out) throws IOException {
out.writeNamedWriteable(format);
out.writeVLong(subsetSize);
out.writeVLong(supersetSize);
out.writeNamedWriteable(significanceHeuristic);
out.writeList(buckets);
}
@Override
protected List<B> getBucketsInternal() {
return buckets;
}
@Override
public B getBucketByKey(String term) {
if (bucketMap == null) {
bucketMap = buckets.stream().collect(Collectors.toMap(InternalSignificantTerms.Bucket::getKeyAsString, Function.identity()));
}
return bucketMap.get(term);
}
@Override
protected long getSubsetSize() {
return subsetSize;
}
@Override
protected long getSupersetSize() {
return supersetSize;
}
@Override
protected SignificanceHeuristic getSignificanceHeuristic() {
return significanceHeuristic;
}
}

View File

@ -18,7 +18,8 @@
*/
package org.elasticsearch.search.aggregations.bucket.significant;
import org.elasticsearch.common.io.stream.Streamable;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregations;
@ -28,44 +29,36 @@ import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import static java.util.Collections.unmodifiableList;
/**
*
* Result of the significant terms aggregation.
*/
public abstract class InternalSignificantTerms<A extends InternalSignificantTerms, B extends InternalSignificantTerms.Bucket> extends
InternalMultiBucketAggregation<A, B> implements SignificantTerms, ToXContent, Streamable {
protected SignificanceHeuristic significanceHeuristic;
protected int requiredSize;
protected long minDocCount;
protected List<? extends Bucket> buckets;
protected Map<String, Bucket> bucketMap;
protected long subsetSize;
protected long supersetSize;
protected InternalSignificantTerms() {} // for serialization
public abstract class InternalSignificantTerms<A extends InternalSignificantTerms<A, B>, B extends InternalSignificantTerms.Bucket<B>>
extends InternalMultiBucketAggregation<A, B> implements SignificantTerms, ToXContent {
@SuppressWarnings("PMD.ConstructorCallsOverridableMethod")
public abstract static class Bucket extends SignificantTerms.Bucket {
public abstract static class Bucket<B extends Bucket<B>> extends SignificantTerms.Bucket {
/**
* Reads a bucket. Should be a constructor reference.
*/
@FunctionalInterface
public interface Reader<B extends Bucket<B>> {
B read(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException;
}
long bucketOrd;
protected InternalAggregations aggregations;
double score;
final transient DocValueFormat format;
protected Bucket(long subsetSize, long supersetSize, DocValueFormat format) {
// for serialization
super(subsetSize, supersetSize);
this.format = format;
}
protected Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize,
InternalAggregations aggregations, DocValueFormat format) {
super(subsetDf, subsetSize, supersetDf, supersetSize);
@ -73,6 +66,14 @@ public abstract class InternalSignificantTerms<A extends InternalSignificantTerm
this.format = format;
}
/**
* Read from a stream.
*/
protected Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) {
super(in, subsetSize, supersetSize);
this.format = format;
}
@Override
public long getSubsetDf() {
return subsetDf;
@ -107,11 +108,11 @@ public abstract class InternalSignificantTerms<A extends InternalSignificantTerm
return aggregations;
}
public Bucket reduce(List<? extends Bucket> buckets, ReduceContext context) {
public B reduce(List<B> buckets, ReduceContext context) {
long subsetDf = 0;
long supersetDf = 0;
List<InternalAggregations> aggregationsList = new ArrayList<>(buckets.size());
for (Bucket bucket : buckets) {
for (B bucket : buckets) {
subsetDf += bucket.subsetDf;
supersetDf += bucket.supersetDf;
aggregationsList.add(bucket.aggregations);
@ -120,7 +121,7 @@ public abstract class InternalSignificantTerms<A extends InternalSignificantTerm
return newBucket(subsetDf, subsetSize, supersetDf, supersetSize, aggs);
}
abstract Bucket newBucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, InternalAggregations aggregations);
abstract B newBucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, InternalAggregations aggregations);
@Override
public double getSignificanceScore() {
@ -128,90 +129,102 @@ public abstract class InternalSignificantTerms<A extends InternalSignificantTerm
}
}
protected DocValueFormat format;
protected final int requiredSize;
protected final long minDocCount;
protected InternalSignificantTerms(long subsetSize, long supersetSize, String name, DocValueFormat format, int requiredSize,
long minDocCount, SignificanceHeuristic significanceHeuristic, List<? extends Bucket> buckets,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
protected InternalSignificantTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) {
super(name, pipelineAggregators, metaData);
this.requiredSize = requiredSize;
this.minDocCount = minDocCount;
this.buckets = buckets;
this.subsetSize = subsetSize;
this.supersetSize = supersetSize;
this.significanceHeuristic = significanceHeuristic;
this.format = Objects.requireNonNull(format);
}
/**
* Read from a stream.
*/
protected InternalSignificantTerms(StreamInput in) throws IOException {
super(in);
requiredSize = readSize(in);
minDocCount = in.readVLong();
}
protected final void doWriteTo(StreamOutput out) throws IOException {
writeSize(requiredSize, out);
out.writeVLong(minDocCount);
writeTermTypeInfoTo(out);
}
protected abstract void writeTermTypeInfoTo(StreamOutput out) throws IOException;
@Override
public Iterator<SignificantTerms.Bucket> iterator() {
Object o = buckets.iterator();
return (Iterator<SignificantTerms.Bucket>) o;
return getBuckets().iterator();
}
@Override
public List<SignificantTerms.Bucket> getBuckets() {
Object o = buckets;
return (List<SignificantTerms.Bucket>) o;
return unmodifiableList(getBucketsInternal());
}
@Override
public SignificantTerms.Bucket getBucketByKey(String term) {
if (bucketMap == null) {
bucketMap = new HashMap<>(buckets.size());
for (Bucket bucket : buckets) {
bucketMap.put(bucket.getKeyAsString(), bucket);
}
}
return bucketMap.get(term);
}
protected abstract List<B> getBucketsInternal();
@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
long globalSubsetSize = 0;
long globalSupersetSize = 0;
// Compute the overall result set size and the corpus size using the
// top-level Aggregations from each shard
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked")
InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
globalSubsetSize += terms.subsetSize;
globalSupersetSize += terms.supersetSize;
globalSubsetSize += terms.getSubsetSize();
globalSupersetSize += terms.getSupersetSize();
}
Map<String, List<InternalSignificantTerms.Bucket>> buckets = new HashMap<>();
Map<String, List<B>> buckets = new HashMap<>();
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked")
InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
for (Bucket bucket : terms.buckets) {
List<Bucket> existingBuckets = buckets.get(bucket.getKeyAsString());
for (B bucket : terms.getBucketsInternal()) {
List<B> existingBuckets = buckets.get(bucket.getKeyAsString());
if (existingBuckets == null) {
existingBuckets = new ArrayList<>(aggregations.size());
buckets.put(bucket.getKeyAsString(), existingBuckets);
}
// Adjust the buckets with the global stats representing the
// total size of the pots from which the stats are drawn
existingBuckets.add(bucket.newBucket(bucket.getSubsetDf(), globalSubsetSize, bucket.getSupersetDf(), globalSupersetSize, bucket.aggregations));
existingBuckets.add(bucket.newBucket(bucket.getSubsetDf(), globalSubsetSize, bucket.getSupersetDf(), globalSupersetSize,
bucket.aggregations));
}
}
significanceHeuristic.initialize(reduceContext);
getSignificanceHeuristic().initialize(reduceContext);
final int size = Math.min(requiredSize, buckets.size());
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
for (Map.Entry<String, List<Bucket>> entry : buckets.entrySet()) {
List<Bucket> sameTermBuckets = entry.getValue();
final Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
b.updateScore(significanceHeuristic);
BucketSignificancePriorityQueue<B> ordered = new BucketSignificancePriorityQueue<>(size);
for (Map.Entry<String, List<B>> entry : buckets.entrySet()) {
List<B> sameTermBuckets = entry.getValue();
final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
b.updateScore(getSignificanceHeuristic());
if ((b.score > 0) && (b.subsetDf >= minDocCount)) {
ordered.insertWithOverflow(b);
}
}
Bucket[] list = new Bucket[ordered.size()];
B[] list = createBucketsArray(ordered.size());
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = (Bucket) ordered.pop();
list[i] = ordered.pop();
}
return create(globalSubsetSize, globalSupersetSize, Arrays.asList(list), this);
return create(globalSubsetSize, globalSupersetSize, Arrays.asList(list));
}
protected abstract A create(long subsetSize, long supersetSize, List<InternalSignificantTerms.Bucket> buckets,
InternalSignificantTerms prototype);
protected abstract A create(long subsetSize, long supersetSize, List<B> buckets);
/**
* Create an array to hold some buckets. Used in collecting the results.
*/
protected abstract B[] createBucketsArray(int size);
protected abstract long getSubsetSize();
protected abstract long getSupersetSize();
protected abstract SignificanceHeuristic getSignificanceHeuristic();
}

View File

@ -22,56 +22,54 @@ import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationStreams;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
*
* Result of the running the significant terms aggregation on a numeric field.
*/
public class SignificantLongTerms extends InternalSignificantTerms<SignificantLongTerms, SignificantLongTerms.Bucket> {
public class SignificantLongTerms extends InternalMappedSignificantTerms<SignificantLongTerms, SignificantLongTerms.Bucket> {
public static final String NAME = "siglterms";
public static final Type TYPE = new Type("significant_terms", "siglterms");
public static final AggregationStreams.Stream STREAM = new AggregationStreams.Stream() {
@Override
public SignificantLongTerms readResult(StreamInput in) throws IOException {
SignificantLongTerms buckets = new SignificantLongTerms();
buckets.readFrom(in);
return buckets;
}
};
public static void registerStreams() {
AggregationStreams.registerStream(STREAM, TYPE.stream());
}
static class Bucket extends InternalSignificantTerms.Bucket {
static class Bucket extends InternalSignificantTerms.Bucket<Bucket> {
long term;
public Bucket(long subsetSize, long supersetSize, DocValueFormat format) {
super(subsetSize, supersetSize, format);
// for serialization
}
public Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, long term, InternalAggregations aggregations,
DocValueFormat format) {
super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format);
this.term = term;
}
public Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, long term, InternalAggregations aggregations, double score) {
public Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, long term, InternalAggregations aggregations,
double score) {
this(subsetDf, subsetSize, supersetDf, supersetSize, term, aggregations, null);
this.score = score;
}
public Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException {
super(in, subsetSize, supersetSize, format);
subsetDf = in.readVLong();
supersetDf = in.readVLong();
term = in.readLong();
score = in.readDouble();
aggregations = InternalAggregations.readAggregations(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVLong(subsetDf);
out.writeVLong(supersetDf);
out.writeLong(term);
out.writeDouble(getSignificanceScore());
aggregations.writeTo(out);
}
@Override
public Object getKey() {
return term;
@ -84,7 +82,7 @@ public class SignificantLongTerms extends InternalSignificantTerms<SignificantLo
@Override
public String getKeyAsString() {
return Long.toString(term);
return format.format(term);
}
@Override
@ -97,25 +95,6 @@ public class SignificantLongTerms extends InternalSignificantTerms<SignificantLo
return new Bucket(subsetDf, subsetSize, supersetDf, supersetSize, term, aggregations, format);
}
@Override
public void readFrom(StreamInput in) throws IOException {
subsetDf = in.readVLong();
supersetDf = in.readVLong();
term = in.readLong();
score = in.readDouble();
aggregations = InternalAggregations.readAggregations(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVLong(subsetDf);
out.writeVLong(supersetDf);
out.writeLong(term);
out.writeDouble(getSignificanceScore());
aggregations.writeTo(out);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
@ -132,25 +111,29 @@ public class SignificantLongTerms extends InternalSignificantTerms<SignificantLo
}
}
SignificantLongTerms() {
} // for serialization
public SignificantLongTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData, DocValueFormat format, long subsetSize, long supersetSize,
SignificanceHeuristic significanceHeuristic, List<Bucket> buckets) {
super(name, requiredSize, minDocCount, pipelineAggregators, metaData, format, subsetSize, supersetSize, significanceHeuristic,
buckets);
}
public SignificantLongTerms(long subsetSize, long supersetSize, String name, DocValueFormat format, int requiredSize,
long minDocCount, SignificanceHeuristic significanceHeuristic, List<? extends InternalSignificantTerms.Bucket> buckets,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
super(subsetSize, supersetSize, name, format, requiredSize, minDocCount, significanceHeuristic, buckets, pipelineAggregators, metaData);
/**
* Read from a stream.
*/
public SignificantLongTerms(StreamInput in) throws IOException {
super(in, Bucket::new);
}
@Override
public Type type() {
return TYPE;
public String getWriteableName() {
return NAME;
}
@Override
public SignificantLongTerms create(List<SignificantLongTerms.Bucket> buckets) {
return new SignificantLongTerms(this.subsetSize, this.supersetSize, this.name, this.format, this.requiredSize, this.minDocCount,
this.significanceHeuristic, buckets, this.pipelineAggregators(), this.metaData);
return new SignificantLongTerms(name, requiredSize, minDocCount, pipelineAggregators(), metaData, format, subsetSize, supersetSize,
significanceHeuristic, buckets);
}
@Override
@ -160,59 +143,24 @@ public class SignificantLongTerms extends InternalSignificantTerms<SignificantLo
}
@Override
protected SignificantLongTerms create(long subsetSize, long supersetSize,
List<org.elasticsearch.search.aggregations.bucket.significant.InternalSignificantTerms.Bucket> buckets,
InternalSignificantTerms prototype) {
return new SignificantLongTerms(subsetSize, supersetSize, prototype.getName(), ((SignificantLongTerms) prototype).format,
prototype.requiredSize, prototype.minDocCount, prototype.significanceHeuristic, buckets, prototype.pipelineAggregators(),
prototype.getMetaData());
}
@Override
protected void doReadFrom(StreamInput in) throws IOException {
this.format = in.readNamedWriteable(DocValueFormat.class);
this.requiredSize = readSize(in);
this.minDocCount = in.readVLong();
this.subsetSize = in.readVLong();
this.supersetSize = in.readVLong();
significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class);
int size = in.readVInt();
List<InternalSignificantTerms.Bucket> buckets = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
Bucket bucket = new Bucket(subsetSize, supersetSize, format);
bucket.readFrom(in);
buckets.add(bucket);
}
this.buckets = buckets;
this.bucketMap = null;
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
out.writeNamedWriteable(format);
writeSize(requiredSize, out);
out.writeVLong(minDocCount);
out.writeVLong(subsetSize);
out.writeVLong(supersetSize);
out.writeNamedWriteable(significanceHeuristic);
out.writeVInt(buckets.size());
for (InternalSignificantTerms.Bucket bucket : buckets) {
bucket.writeTo(out);
}
protected SignificantLongTerms create(long subsetSize, long supersetSize, List<Bucket> buckets) {
return new SignificantLongTerms(getName(), requiredSize, minDocCount, pipelineAggregators(), getMetaData(), format, subsetSize,
supersetSize, significanceHeuristic, buckets);
}
@Override
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
builder.field("doc_count", subsetSize);
builder.startArray(CommonFields.BUCKETS);
for (InternalSignificantTerms.Bucket bucket : buckets) {
for (Bucket bucket : buckets) {
bucket.toXContent(builder, params);
}
builder.endArray();
return builder;
}
@Override
protected Bucket[] createBucketsArray(int size) {
return new Bucket[size];
}
}

View File

@ -36,10 +36,11 @@ import org.elasticsearch.search.internal.ContextIndexSearcher;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import static java.util.Collections.emptyList;
/**
*
*/
@ -82,7 +83,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
long supersetSize = termsAggFactory.getSupersetNumDocs();
long subsetSize = numCollectedDocs;
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
BucketSignificancePriorityQueue<SignificantLongTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(size);
SignificantLongTerms.Bucket spare = null;
for (long i = 0; i < bucketOrds.size(); i++) {
final int docCount = bucketDocCount(i);
@ -102,18 +103,17 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
spare.updateScore(significanceHeuristic);
spare.bucketOrd = i;
spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare);
spare = ordered.insertWithOverflow(spare);
}
final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];
final SignificantLongTerms.Bucket[] list = new SignificantLongTerms.Bucket[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
final SignificantLongTerms.Bucket bucket = (SignificantLongTerms.Bucket) ordered.pop();
final SignificantLongTerms.Bucket bucket = ordered.pop();
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
list[i] = bucket;
}
return new SignificantLongTerms(subsetSize, supersetSize, name, format, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), significanceHeuristic, Arrays.asList(list), pipelineAggregators(),
metaData());
return new SignificantLongTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
pipelineAggregators(), metaData(), format, subsetSize, supersetSize, significanceHeuristic, Arrays.asList(list));
}
@Override
@ -122,9 +122,8 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
ContextIndexSearcher searcher = context.searchContext().searcher();
IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs();
return new SignificantLongTerms(0, supersetSize, name, format, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), significanceHeuristic,
Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
return new SignificantLongTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
pipelineAggregators(), metaData(), format, 0, supersetSize, significanceHeuristic, emptyList());
}
@Override

View File

@ -23,56 +23,51 @@ import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationStreams;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
*
* Result of the running the significant terms aggregation on a String field.
*/
public class SignificantStringTerms extends InternalSignificantTerms<SignificantStringTerms, SignificantStringTerms.Bucket> {
public class SignificantStringTerms extends InternalMappedSignificantTerms<SignificantStringTerms, SignificantStringTerms.Bucket> {
public static final String NAME = "sigsterms";
public static final InternalAggregation.Type TYPE = new Type("significant_terms", "sigsterms");
public static final AggregationStreams.Stream STREAM = new AggregationStreams.Stream() {
@Override
public SignificantStringTerms readResult(StreamInput in) throws IOException {
SignificantStringTerms buckets = new SignificantStringTerms();
buckets.readFrom(in);
return buckets;
}
};
public static void registerStream() {
AggregationStreams.registerStream(STREAM, TYPE.stream());
}
public static void registerStreams() {
AggregationStreams.registerStream(STREAM, TYPE.stream());
}
public static class Bucket extends InternalSignificantTerms.Bucket {
public static class Bucket extends InternalSignificantTerms.Bucket<Bucket> {
BytesRef termBytes;
public Bucket(long subsetSize, long supersetSize, DocValueFormat format) {
// for serialization
super(subsetSize, supersetSize, format);
}
public Bucket(BytesRef term, long subsetDf, long subsetSize, long supersetDf, long supersetSize, InternalAggregations aggregations,
DocValueFormat format) {
super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format);
this.termBytes = term;
}
/**
* Read from a stream.
*/
public Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException {
super(in, subsetSize, supersetSize, format);
termBytes = in.readBytesRef();
subsetDf = in.readVLong();
supersetDf = in.readVLong();
score = in.readDouble();
aggregations = InternalAggregations.readAggregations(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeBytesRef(termBytes);
out.writeVLong(subsetDf);
out.writeVLong(supersetDf);
out.writeDouble(getSignificanceScore());
aggregations.writeTo(out);
}
public Bucket(BytesRef term, long subsetDf, long subsetSize, long supersetDf, long supersetSize,
InternalAggregations aggregations, double score, DocValueFormat format) {
this(term, subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format);
@ -105,24 +100,6 @@ public class SignificantStringTerms extends InternalSignificantTerms<Significant
return new Bucket(termBytes, subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format);
}
@Override
public void readFrom(StreamInput in) throws IOException {
termBytes = in.readBytesRef();
subsetDf = in.readVLong();
supersetDf = in.readVLong();
score = in.readDouble();
aggregations = InternalAggregations.readAggregations(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeBytesRef(termBytes);
out.writeVLong(subsetDf);
out.writeVLong(supersetDf);
out.writeDouble(getSignificanceScore());
aggregations.writeTo(out);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
@ -136,24 +113,29 @@ public class SignificantStringTerms extends InternalSignificantTerms<Significant
}
}
SignificantStringTerms() {} // for serialization
public SignificantStringTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData, DocValueFormat format, long subsetSize, long supersetSize,
SignificanceHeuristic significanceHeuristic, List<Bucket> buckets) {
super(name, requiredSize, minDocCount, pipelineAggregators, metaData, format, subsetSize, supersetSize, significanceHeuristic,
buckets);
}
public SignificantStringTerms(long subsetSize, long supersetSize, String name, DocValueFormat format, int requiredSize,
long minDocCount, SignificanceHeuristic significanceHeuristic, List<? extends InternalSignificantTerms.Bucket> buckets,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) {
super(subsetSize, supersetSize, name, format, requiredSize, minDocCount, significanceHeuristic, buckets, pipelineAggregators, metaData);
/**
* Read from a stream.
*/
public SignificantStringTerms(StreamInput in) throws IOException {
super(in, Bucket::new);
}
@Override
public Type type() {
return TYPE;
public String getWriteableName() {
return NAME;
}
@Override
public SignificantStringTerms create(List<SignificantStringTerms.Bucket> buckets) {
return new SignificantStringTerms(this.subsetSize, this.supersetSize, this.name, this.format, this.requiredSize, this.minDocCount,
this.significanceHeuristic, buckets, this.pipelineAggregators(), this.metaData);
return new SignificantStringTerms(name, requiredSize, minDocCount, pipelineAggregators(), metaData, format, subsetSize,
supersetSize, significanceHeuristic, buckets);
}
@Override
@ -163,50 +145,16 @@ public class SignificantStringTerms extends InternalSignificantTerms<Significant
}
@Override
protected SignificantStringTerms create(long subsetSize, long supersetSize, List<InternalSignificantTerms.Bucket> buckets,
InternalSignificantTerms prototype) {
return new SignificantStringTerms(subsetSize, supersetSize, prototype.getName(), prototype.format, prototype.requiredSize,
prototype.minDocCount, prototype.significanceHeuristic, buckets, prototype.pipelineAggregators(), prototype.getMetaData());
}
@Override
protected void doReadFrom(StreamInput in) throws IOException {
this.format = in.readNamedWriteable(DocValueFormat.class);
this.requiredSize = readSize(in);
this.minDocCount = in.readVLong();
this.subsetSize = in.readVLong();
this.supersetSize = in.readVLong();
significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class);
int size = in.readVInt();
List<InternalSignificantTerms.Bucket> buckets = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
Bucket bucket = new Bucket(subsetSize, supersetSize, format);
bucket.readFrom(in);
buckets.add(bucket);
}
this.buckets = buckets;
this.bucketMap = null;
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
out.writeNamedWriteable(format);
writeSize(requiredSize, out);
out.writeVLong(minDocCount);
out.writeVLong(subsetSize);
out.writeVLong(supersetSize);
out.writeNamedWriteable(significanceHeuristic);
out.writeVInt(buckets.size());
for (InternalSignificantTerms.Bucket bucket : buckets) {
bucket.writeTo(out);
}
protected SignificantStringTerms create(long subsetSize, long supersetSize, List<Bucket> buckets) {
return new SignificantStringTerms(getName(), requiredSize, minDocCount, pipelineAggregators(), getMetaData(), format, subsetSize,
supersetSize, significanceHeuristic, buckets);
}
@Override
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
builder.field("doc_count", subsetSize);
builder.startArray(CommonFields.BUCKETS);
for (InternalSignificantTerms.Bucket bucket : buckets) {
for (Bucket bucket : buckets) {
//There is a condition (presumably when only one shard has a bucket?) where reduce is not called
// and I end up with buckets that contravene the user's min_doc_count criteria in my reducer
if (bucket.subsetDf >= minDocCount) {
@ -217,4 +165,8 @@ public class SignificantStringTerms extends InternalSignificantTerms<Significant
return builder;
}
@Override
protected Bucket[] createBucketsArray(int size) {
return new Bucket[size];
}
}

View File

@ -37,10 +37,11 @@ import org.elasticsearch.search.internal.ContextIndexSearcher;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import static java.util.Collections.emptyList;
/**
* An aggregator of significant string values.
*/
@ -81,7 +82,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
long supersetSize = termsAggFactory.getSupersetNumDocs();
long subsetSize = numCollectedDocs;
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(size);
SignificantStringTerms.Bucket spare = null;
for (int i = 0; i < bucketOrds.size(); i++) {
final int docCount = bucketDocCount(i);
@ -105,21 +106,21 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
spare.updateScore(significanceHeuristic);
spare.bucketOrd = i;
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
spare = ordered.insertWithOverflow(spare);
}
final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];
final SignificantStringTerms.Bucket[] list = new SignificantStringTerms.Bucket[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
final SignificantStringTerms.Bucket bucket = (SignificantStringTerms.Bucket) ordered.pop();
final SignificantStringTerms.Bucket bucket = ordered.pop();
// the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
list[i] = bucket;
}
return new SignificantStringTerms(subsetSize, supersetSize, name, format, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), significanceHeuristic, Arrays.asList(list), pipelineAggregators(),
metaData());
return new SignificantStringTerms( name, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), pipelineAggregators(),
metaData(), format, subsetSize, supersetSize, significanceHeuristic, Arrays.asList(list));
}
@Override
@ -128,9 +129,8 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
ContextIndexSearcher searcher = context.searchContext().searcher();
IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs();
return new SignificantStringTerms(0, supersetSize, name, format, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), significanceHeuristic,
Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
pipelineAggregators(), metaData(), format, 0, supersetSize, significanceHeuristic, emptyList());
}
@Override

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.search.aggregations.bucket.significant;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
@ -27,8 +28,6 @@ import java.util.List;
* An aggregation that collects significant terms in comparison to a background set.
*/
public interface SignificantTerms extends MultiBucketsAggregation, Iterable<SignificantTerms.Bucket> {
abstract static class Bucket extends InternalMultiBucketAggregation.InternalBucket {
long subsetDf;
@ -36,18 +35,21 @@ public interface SignificantTerms extends MultiBucketsAggregation, Iterable<Sign
long supersetDf;
long supersetSize;
protected Bucket(long subsetSize, long supersetSize) {
// for serialization
Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize) {
this.subsetSize = subsetSize;
this.supersetSize = supersetSize;
}
Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize) {
this(subsetSize, supersetSize);
this.subsetDf = subsetDf;
this.supersetDf = supersetDf;
}
/**
* Read from a stream.
*/
protected Bucket(StreamInput in, long subsetSize, long supersetSize) {
this.subsetSize = subsetSize;
this.supersetSize = supersetSize;
}
abstract int compareTerm(SignificantTerms.Bucket other);
public abstract double getSignificanceScore();

View File

@ -24,7 +24,9 @@ import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
import org.elasticsearch.search.aggregations.InternalAggregation.Type;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator;
@ -46,7 +48,8 @@ import java.util.Objects;
*
*/
public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationBuilder<ValuesSource, SignificantTermsAggregationBuilder> {
public static final String NAME = SignificantStringTerms.TYPE.name();
public static final String NAME = "significant_terms";
public static final InternalAggregation.Type TYPE = new Type(NAME);
public static final ParseField AGGREGATION_NAME_FIELD = new ParseField(NAME);
static final ParseField BACKGROUND_FILTER = new ParseField("background_filter");
@ -63,14 +66,14 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
private SignificanceHeuristic significanceHeuristic = DEFAULT_SIGNIFICANCE_HEURISTIC;
public SignificantTermsAggregationBuilder(String name, ValueType valueType) {
super(name, SignificantStringTerms.TYPE, ValuesSourceType.ANY, valueType);
super(name, TYPE, ValuesSourceType.ANY, valueType);
}
/**
* Read from a Stream.
*/
public SignificantTermsAggregationBuilder(StreamInput in) throws IOException {
super(in, SignificantStringTerms.TYPE, ValuesSourceType.ANY);
super(in, TYPE, ValuesSourceType.ANY);
bucketCountThresholds = new BucketCountThresholds(in);
executionHint = in.readOptionalString();
filterBuilder = in.readOptionalNamedWriteable(QueryBuilder.class);

View File

@ -18,69 +18,75 @@
*/
package org.elasticsearch.search.aggregations.bucket.significant;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationStreams;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms;
import org.elasticsearch.search.aggregations.bucket.terms.UnmappedTerms;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import static java.util.Collections.emptyList;
/**
*
* Result of the running the significant terms aggregation on an unmapped field.
*/
public class UnmappedSignificantTerms extends InternalSignificantTerms<UnmappedSignificantTerms, InternalSignificantTerms.Bucket> {
public class UnmappedSignificantTerms extends InternalSignificantTerms<UnmappedSignificantTerms, UnmappedSignificantTerms.Bucket> {
public static final String NAME = "umsigterms";
public static final Type TYPE = new Type("significant_terms", "umsigterms");
private static final List<Bucket> BUCKETS = Collections.emptyList();
private static final Map<String, Bucket> BUCKETS_MAP = Collections.emptyMap();
public static final AggregationStreams.Stream STREAM = new AggregationStreams.Stream() {
@Override
public UnmappedSignificantTerms readResult(StreamInput in) throws IOException {
UnmappedSignificantTerms buckets = new UnmappedSignificantTerms();
buckets.readFrom(in);
return buckets;
/**
* Concrete type that can't be built because Java needs a concrent type so {@link InternalTerms.Bucket} can have a self type but
* {@linkplain UnmappedTerms} doesn't ever need to build it because it never returns any buckets.
*/
protected abstract static class Bucket extends InternalSignificantTerms.Bucket<Bucket> {
private Bucket(BytesRef term, long subsetDf, long subsetSize, long supersetDf, long supersetSize, InternalAggregations aggregations,
DocValueFormat format) {
super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format);
}
};
public static void registerStreams() {
AggregationStreams.registerStream(STREAM, TYPE.stream());
}
UnmappedSignificantTerms() {} // for serialization
public UnmappedSignificantTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) {
super(name, requiredSize, minDocCount, pipelineAggregators, metaData);
}
public UnmappedSignificantTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
//We pass zero for index/subset sizes because for the purpose of significant term analysis
// we assume an unmapped index's size is irrelevant to the proceedings.
super(0, 0, name, DocValueFormat.RAW, requiredSize, minDocCount, SignificantTermsAggregationBuilder.DEFAULT_SIGNIFICANCE_HEURISTIC,
BUCKETS, pipelineAggregators, metaData);
/**
* Read from a stream.
*/
public UnmappedSignificantTerms(StreamInput in) throws IOException {
super(in);
}
@Override
public Type type() {
return TYPE;
protected void writeTermTypeInfoTo(StreamOutput out) throws IOException {
// Nothing to write
}
@Override
public UnmappedSignificantTerms create(List<InternalSignificantTerms.Bucket> buckets) {
return new UnmappedSignificantTerms(this.name, this.requiredSize, this.minDocCount, this.pipelineAggregators(), this.metaData);
public String getWriteableName() {
return NAME;
}
@Override
public InternalSignificantTerms.Bucket createBucket(InternalAggregations aggregations, InternalSignificantTerms.Bucket prototype) {
public UnmappedSignificantTerms create(List<Bucket> buckets) {
return new UnmappedSignificantTerms(name, requiredSize, minDocCount, pipelineAggregators(), metaData);
}
@Override
public Bucket createBucket(InternalAggregations aggregations, Bucket prototype) {
throw new UnsupportedOperationException("not supported for UnmappedSignificantTerms");
}
@Override
protected UnmappedSignificantTerms create(long subsetSize, long supersetSize, List<Bucket> buckets, InternalSignificantTerms prototype) {
protected UnmappedSignificantTerms create(long subsetSize, long supersetSize, List<Bucket> buckets) {
throw new UnsupportedOperationException("not supported for UnmappedSignificantTerms");
}
@ -94,24 +100,39 @@ public class UnmappedSignificantTerms extends InternalSignificantTerms<UnmappedS
return this;
}
@Override
protected void doReadFrom(StreamInput in) throws IOException {
this.requiredSize = readSize(in);
this.minDocCount = in.readVLong();
this.buckets = BUCKETS;
this.bucketMap = BUCKETS_MAP;
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
writeSize(requiredSize, out);
out.writeVLong(minDocCount);
}
@Override
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
builder.startArray(CommonFields.BUCKETS).endArray();
return builder;
}
@Override
protected Bucket[] createBucketsArray(int size) {
return new Bucket[size];
}
@Override
protected List<Bucket> getBucketsInternal() {
return emptyList();
}
@Override
public SignificantTerms.Bucket getBucketByKey(String term) {
return null;
}
@Override
protected SignificanceHeuristic getSignificanceHeuristic() {
throw new UnsupportedOperationException();
}
@Override
protected long getSubsetSize() {
return 0;
}
@Override
protected long getSupersetSize() {
return 0;
}
}

View File

@ -63,6 +63,7 @@ public class UnmappedTerms extends InternalTerms<UnmappedTerms, UnmappedTerms.Bu
@Override
protected void writeTermTypeInfoTo(StreamOutput out) throws IOException {
// Nothing to write
}
@Override

View File

@ -41,18 +41,22 @@ import org.elasticsearch.test.ESIntegTestCase;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.search.aggregations.AggregationBuilders.significantTerms;
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.core.IsNull.notNullValue;
/**
*
@ -385,6 +389,27 @@ public class SignificantTermsIT extends ESIntegTestCase {
checkExpectedStringTermsFound(topTerms);
}
public void testPartiallyUnmappedWithFormat() throws Exception {
SearchResponse response = client().prepareSearch("idx_unmapped", "test")
.setSearchType(SearchType.QUERY_AND_FETCH)
.setQuery(boolQuery().should(termQuery("_all", "the")).should(termQuery("_all", "terje")))
.setFrom(0).setSize(60).setExplain(true)
.addAggregation(significantTerms("mySignificantTerms")
.field("fact_category")
.executionHint(randomExecutionHint())
.minDocCount(1)
.format("0000"))
.execute()
.actionGet();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
for (int i = 1; i <= 3; i++) {
String key = String.format(Locale.ROOT, "%04d", i);
SignificantTerms.Bucket bucket = topTerms.getBucketByKey(key);
assertThat(bucket, notNullValue());
assertThat(bucket.getKeyAsString(), equalTo(key));
}
}
private void checkExpectedStringTermsFound(SignificantTerms topTerms) {
HashMap<String,Bucket>topWords=new HashMap<>();

View File

@ -58,22 +58,23 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static java.util.Collections.emptyList;
import static java.util.Collections.emptyMap;
import static java.util.Collections.singletonList;
import static org.elasticsearch.search.aggregations.AggregationBuilders.significantTerms;
import static org.elasticsearch.test.VersionUtils.randomVersion;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.lessThan;
import static org.hamcrest.Matchers.lessThanOrEqualTo;
/**
*
*/
public class SignificanceHeuristicTests extends ESTestCase {
static class SignificantTermsTestSearchContext extends TestSearchContext {
@ -95,13 +96,13 @@ public class SignificanceHeuristicTests extends ESTestCase {
// test that stream output can actually be read - does not replace bwc test
public void testStreamResponse() throws Exception {
Version version = randomVersion(random());
InternalSignificantTerms[] sigTerms = getRandomSignificantTerms(getRandomSignificanceheuristic());
InternalMappedSignificantTerms<?, ?> sigTerms = getRandomSignificantTerms(getRandomSignificanceheuristic());
// write
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer);
out.setVersion(version);
sigTerms[0].writeTo(out);
out.writeNamedWriteable(sigTerms);
// read
ByteArrayInputStream inBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
@ -110,11 +111,11 @@ public class SignificanceHeuristicTests extends ESTestCase {
new SearchModule(Settings.EMPTY, registry, false, emptyList()); // populates the registry through side effects
in = new NamedWriteableAwareStreamInput(in, registry);
in.setVersion(version);
sigTerms[1].readFrom(in);
InternalMappedSignificantTerms<?, ?> read = (InternalMappedSignificantTerms<?, ?>) in.readNamedWriteable(InternalAggregation.class);
assertTrue(sigTerms[1].significanceHeuristic.equals(sigTerms[0].significanceHeuristic));
InternalSignificantTerms.Bucket originalBucket = (InternalSignificantTerms.Bucket) sigTerms[0].buckets.get(0);
InternalSignificantTerms.Bucket streamedBucket = (InternalSignificantTerms.Bucket) sigTerms[1].buckets.get(0);
assertEquals(sigTerms.significanceHeuristic, read.significanceHeuristic);
SignificantTerms.Bucket originalBucket = sigTerms.getBuckets().get(0);
SignificantTerms.Bucket streamedBucket = read.getBuckets().get(0);
assertThat(originalBucket.getKeyAsString(), equalTo(streamedBucket.getKeyAsString()));
assertThat(originalBucket.getSupersetDf(), equalTo(streamedBucket.getSupersetDf()));
assertThat(originalBucket.getSubsetDf(), equalTo(streamedBucket.getSubsetDf()));
@ -122,22 +123,18 @@ public class SignificanceHeuristicTests extends ESTestCase {
assertThat(streamedBucket.getSupersetSize(), equalTo(20L));
}
InternalSignificantTerms[] getRandomSignificantTerms(SignificanceHeuristic heuristic) {
InternalSignificantTerms[] sTerms = new InternalSignificantTerms[2];
ArrayList<InternalSignificantTerms.Bucket> buckets = new ArrayList<>();
InternalMappedSignificantTerms<?, ?> getRandomSignificantTerms(SignificanceHeuristic heuristic) {
if (randomBoolean()) {
buckets.add(new SignificantLongTerms.Bucket(1, 2, 3, 4, 123, InternalAggregations.EMPTY, null));
sTerms[0] = new SignificantLongTerms(10, 20, "some_name", DocValueFormat.RAW, 1, 1, heuristic, buckets,
Collections.emptyList(), null);
sTerms[1] = new SignificantLongTerms();
SignificantLongTerms.Bucket bucket = new SignificantLongTerms.Bucket(1, 2, 3, 4, 123, InternalAggregations.EMPTY,
DocValueFormat.RAW);
return new SignificantLongTerms("some_name", 1, 1, emptyList(), null, DocValueFormat.RAW, 10, 20, heuristic,
singletonList(bucket));
} else {
BytesRef term = new BytesRef("someterm");
buckets.add(new SignificantStringTerms.Bucket(term, 1, 2, 3, 4, InternalAggregations.EMPTY, DocValueFormat.RAW));
sTerms[0] = new SignificantStringTerms(10, 20, "some_name", DocValueFormat.RAW, 1, 1, heuristic, buckets,
Collections.emptyList(), null);
sTerms[1] = new SignificantStringTerms();
SignificantStringTerms.Bucket bucket = new SignificantStringTerms.Bucket(new BytesRef("someterm"), 1, 2, 3, 4,
InternalAggregations.EMPTY, DocValueFormat.RAW);
return new SignificantStringTerms("some_name", 1, 1, emptyList(), null, DocValueFormat.RAW, 10, 20, heuristic,
singletonList(bucket));
}
return sTerms;
}
SignificanceHeuristic getRandomSignificanceheuristic() {
@ -165,37 +162,54 @@ public class SignificanceHeuristicTests extends ESTestCase {
// Create aggregations as they might come from three different shards and return as list.
private List<InternalAggregation> createInternalAggregations() {
String type = randomBoolean() ? "long" : "string";
SignificanceHeuristic significanceHeuristic = getRandomSignificanceheuristic();
TestAggFactory<?, ?> factory = randomBoolean() ? new StringTestAggFactory() : new LongTestAggFactory();
List<InternalAggregation> aggs = new ArrayList<>();
List<InternalSignificantTerms.Bucket> terms0Buckets = new ArrayList<>();
terms0Buckets.add(createBucket(type, 4, 4, 5, 10, 0));
aggs.add(createAggregation(type, significanceHeuristic, terms0Buckets, 4, 10));
List<InternalSignificantTerms.Bucket> terms1Buckets = new ArrayList<>();
terms0Buckets.add(createBucket(type, 4, 4, 5, 10, 1));
aggs.add(createAggregation(type, significanceHeuristic, terms1Buckets, 4, 10));
List<InternalSignificantTerms.Bucket> terms01Buckets = new ArrayList<>();
terms0Buckets.add(createBucket(type, 4, 8, 5, 10, 0));
terms0Buckets.add(createBucket(type, 4, 8, 5, 10, 1));
aggs.add(createAggregation(type, significanceHeuristic, terms01Buckets, 8, 10));
aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 0)));
aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 1)));
aggs.add(factory.createAggregation(significanceHeuristic, 8, 10, 2, (f, i) -> f.createBucket(4, 4, 5, 10, i)));
return aggs;
}
private InternalSignificantTerms createAggregation(String type, SignificanceHeuristic significanceHeuristic, List<InternalSignificantTerms.Bucket> buckets, long subsetSize, long supersetSize) {
if (type.equals("string")) {
return new SignificantStringTerms(subsetSize, supersetSize, "sig_terms", DocValueFormat.RAW, 2, -1, significanceHeuristic, buckets, new ArrayList<PipelineAggregator>(), new HashMap<String, Object>());
} else {
return new SignificantLongTerms(subsetSize, supersetSize, "sig_terms", DocValueFormat.RAW, 2, -1, significanceHeuristic, buckets, new ArrayList<PipelineAggregator>(), new HashMap<String, Object>());
private abstract class TestAggFactory<A extends InternalSignificantTerms<A, B>, B extends InternalSignificantTerms.Bucket<B>> {
final A createAggregation(SignificanceHeuristic significanceHeuristic, long subsetSize, long supersetSize, int bucketCount,
BiFunction<TestAggFactory<?, B>, Integer, B> bucketFactory) {
List<B> buckets = IntStream.range(0, bucketCount).mapToObj(i -> bucketFactory.apply(this, i))
.collect(Collectors.toList());
return createAggregation(significanceHeuristic, subsetSize, supersetSize, buckets);
}
abstract A createAggregation(SignificanceHeuristic significanceHeuristic, long subsetSize, long supersetSize, List<B> buckets);
abstract B createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label);
}
private class StringTestAggFactory extends TestAggFactory<SignificantStringTerms, SignificantStringTerms.Bucket> {
@Override
SignificantStringTerms createAggregation(SignificanceHeuristic significanceHeuristic, long subsetSize, long supersetSize,
List<SignificantStringTerms.Bucket> buckets) {
return new SignificantStringTerms("sig_terms", 2, -1, emptyList(),
emptyMap(), DocValueFormat.RAW, subsetSize, supersetSize, significanceHeuristic, buckets);
}
@Override
SignificantStringTerms.Bucket createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) {
return new SignificantStringTerms.Bucket(new BytesRef(Long.toString(label).getBytes(StandardCharsets.UTF_8)), subsetDF,
subsetSize, supersetDF, supersetSize, InternalAggregations.EMPTY, DocValueFormat.RAW);
}
}
private class LongTestAggFactory extends TestAggFactory<SignificantLongTerms, SignificantLongTerms.Bucket> {
@Override
SignificantLongTerms createAggregation(SignificanceHeuristic significanceHeuristic, long subsetSize, long supersetSize,
List<SignificantLongTerms.Bucket> buckets) {
return new SignificantLongTerms("sig_terms", 2, -1, new ArrayList<PipelineAggregator>(), emptyMap(), DocValueFormat.RAW,
subsetSize, supersetSize, significanceHeuristic, buckets);
}
private InternalSignificantTerms.Bucket createBucket(String type, long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) {
if (type.equals("string")) {
return new SignificantStringTerms.Bucket(new BytesRef(Long.toString(label).getBytes(StandardCharsets.UTF_8)), subsetDF, subsetSize, supersetDF, supersetSize, InternalAggregations.EMPTY, DocValueFormat.RAW);
} else {
return new SignificantLongTerms.Bucket(subsetDF, subsetSize, supersetDF, supersetSize, label, InternalAggregations.EMPTY, DocValueFormat.RAW);
@Override
SignificantLongTerms.Bucket createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) {
return new SignificantLongTerms.Bucket(subsetDF, subsetSize, supersetDF, supersetSize, label, InternalAggregations.EMPTY,
DocValueFormat.RAW);
}
}
@ -214,14 +228,22 @@ public class SignificanceHeuristicTests extends ESTestCase {
// test mutual information with string
boolean includeNegatives = randomBoolean();
boolean backgroundIsSuperset = randomBoolean();
assertThat(parseFromString(heuristicParserMapper, searchContext, "\"mutual_information\":{\"include_negatives\": " + includeNegatives + ", \"background_is_superset\":" + backgroundIsSuperset + "}"), equalTo((SignificanceHeuristic) (new MutualInformation(includeNegatives, backgroundIsSuperset))));
assertThat(parseFromString(heuristicParserMapper, searchContext, "\"chi_square\":{\"include_negatives\": " + includeNegatives + ", \"background_is_superset\":" + backgroundIsSuperset + "}"), equalTo((SignificanceHeuristic) (new ChiSquare(includeNegatives, backgroundIsSuperset))));
String mutual = "\"mutual_information\":{\"include_negatives\": " + includeNegatives + ", \"background_is_superset\":"
+ backgroundIsSuperset + "}";
assertEquals(new MutualInformation(includeNegatives, backgroundIsSuperset),
parseFromString(heuristicParserMapper, searchContext, mutual));
String chiSquare = "\"chi_square\":{\"include_negatives\": " + includeNegatives + ", \"background_is_superset\":"
+ backgroundIsSuperset + "}";
assertEquals(new ChiSquare(includeNegatives, backgroundIsSuperset),
parseFromString(heuristicParserMapper, searchContext, chiSquare));
// test with builders
assertTrue(parseFromBuilder(heuristicParserMapper, searchContext, new JLHScore()) instanceof JLHScore);
assertTrue(parseFromBuilder(heuristicParserMapper, searchContext, new GND(backgroundIsSuperset)) instanceof GND);
assertThat(parseFromBuilder(heuristicParserMapper, searchContext, new MutualInformation(includeNegatives, backgroundIsSuperset)), equalTo((SignificanceHeuristic) new MutualInformation(includeNegatives, backgroundIsSuperset)));
assertThat(parseFromBuilder(heuristicParserMapper, searchContext, new ChiSquare(includeNegatives, backgroundIsSuperset)), equalTo((SignificanceHeuristic) new ChiSquare(includeNegatives, backgroundIsSuperset)));
assertThat(parseFromBuilder(heuristicParserMapper, searchContext, new JLHScore()), instanceOf(JLHScore.class));
assertThat(parseFromBuilder(heuristicParserMapper, searchContext, new GND(backgroundIsSuperset)), instanceOf(GND.class));
assertEquals(new MutualInformation(includeNegatives, backgroundIsSuperset),
parseFromBuilder(heuristicParserMapper, searchContext, new MutualInformation(includeNegatives, backgroundIsSuperset)));
assertEquals(new ChiSquare(includeNegatives, backgroundIsSuperset),
parseFromBuilder(heuristicParserMapper, searchContext, new ChiSquare(includeNegatives, backgroundIsSuperset)));
// test exceptions
String faultyHeuristicdefinition = "\"mutual_information\":{\"include_negatives\": false, \"some_unknown_field\": false}";
@ -246,7 +268,8 @@ public class SignificanceHeuristicTests extends ESTestCase {
IndicesQueriesRegistry registry = new IndicesQueriesRegistry();
try {
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"text\", " + faultyHeuristicDefinition + ",\"min_doc_count\":200}");
XContentParser stParser = JsonXContent.jsonXContent.createParser(
"{\"field\":\"text\", " + faultyHeuristicDefinition + ",\"min_doc_count\":200}");
QueryParseContext parseContext = new QueryParseContext(registry, stParser, ParseFieldMatcher.STRICT);
stParser.nextToken();
new SignificantTermsParser(significanceHeuristicParserRegistry, registry).parse("testagg", parseContext);
@ -283,7 +306,8 @@ public class SignificanceHeuristicTests extends ESTestCase {
protected SignificanceHeuristic parseFromString(ParseFieldRegistry<SignificanceHeuristicParser> significanceHeuristicParserRegistry,
SearchContext searchContext, String heuristicString) throws IOException {
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"text\", " + heuristicString + ", \"min_doc_count\":200}");
XContentParser stParser = JsonXContent.jsonXContent.createParser(
"{\"field\":\"text\", " + heuristicString + ", \"min_doc_count\":200}");
return parseSignificanceHeuristic(significanceHeuristicParserRegistry, searchContext, stParser);
}