[7.x] Add Student's t-test aggregation support (#54469) (#54737)

Adds t_test metric aggregation that can perform paired and unpaired two-sample
t-tests. In this PR support for filters in unpaired is still missing. It will
be added in a follow-up PR.

Relates to #53692
This commit is contained in:
Igor Motov 2020-04-06 11:36:47 -04:00 committed by GitHub
parent 0049e9467b
commit 2794572a35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 2465 additions and 19 deletions

View File

@ -535,6 +535,41 @@ for (int i = 0; i < 100; i++) {
{"load_time": "$value"}"""
}
// Used by t_test aggregations
buildRestTests.setups['node_upgrade'] = '''
- do:
indices.create:
index: node_upgrade
body:
settings:
number_of_shards: 1
number_of_replicas: 1
mappings:
properties:
name:
type: keyword
startup_time_before:
type: long
startup_time_after:
type: long
- do:
bulk:
index: node_upgrade
refresh: true
body: |
{"index":{}}
{"name": "A", "startup_time_before": 102, "startup_time_after": 89}
{"index":{}}
{"name": "B", "startup_time_before": 99, "startup_time_after": 93}
{"index":{}}
{"name": "C", "startup_time_before": 111, "startup_time_after": 72}
{"index":{}}
{"name": "D", "startup_time_before": 97, "startup_time_after": 98}
{"index":{}}
{"name": "E", "startup_time_before": 101, "startup_time_after": 102}
{"index":{}}
{"name": "F", "startup_time_before": 99, "startup_time_after": 98}'''
// Used by iprange agg
buildRestTests.setups['iprange'] = '''
- do:

View File

@ -49,7 +49,7 @@ include::metrics/median-absolute-deviation-aggregation.asciidoc[]
include::metrics/boxplot-aggregation.asciidoc[]
include::metrics/t-test-aggregation.asciidoc[]

View File

@ -0,0 +1,114 @@
[role="xpack"]
[testenv="basic"]
[[search-aggregations-metrics-ttest-aggregation]]
=== TTest Aggregation
A `t_test` metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a Student's t-distribution
under the null hypothesis on numeric values extracted from the aggregated documents or generated by provided scripts. In practice, this
will tell you if the difference between two population means are statistically significant and did not occur by chance alone.
==== Syntax
A `t_test` aggregation looks like this in isolation:
[source,js]
--------------------------------------------------
{
"t_test": {
"a": "value_before",
"b": "value_after",
"type": "paired"
}
}
--------------------------------------------------
// NOTCONSOLE
Assuming that we have a record of node start up times before and after upgrade, let's look at a t-test to see if upgrade affected
the node start up time in a meaningful way.
[source,console]
--------------------------------------------------
GET node_upgrade/_search
{
"size": 0,
"aggs" : {
"startup_time_ttest" : {
"t_test" : {
"a" : {"field": "startup_time_before"}, <1>
"b" : {"field": "startup_time_after"}, <2>
"type": "paired" <3>
}
}
}
}
--------------------------------------------------
// TEST[setup:node_upgrade]
<1> The field `startup_time_before` must be a numeric field
<2> The field `startup_time_after` must be a numeric field
<3> Since we have data from the same nodes, we are using paired t-test.
The response will return the p-value or probability value for the test. It is the probability of obtaining results at least as extreme as
the result processed by the aggregation, assuming that the null hypothesis is correct (which means there is no difference between
population means). Smaller p-value means the null hypothesis is more likely to be incorrect and population means are indeed different.
[source,console-result]
--------------------------------------------------
{
...
"aggregations": {
"startup_time_ttest": {
"value": 0.1914368843365979 <1>
}
}
}
--------------------------------------------------
// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
<1> The p-value.
==== T-Test Types
The `t_test` aggregation supports unpaired and paired two-sample t-tests. The type of the test can be specified using the `type` parameter:
`"type": "paired"`:: performs paired t-test
`"type": "homoscedastic"`:: performs two-sample equal variance test
`"type": "heteroscedastic"`:: performs two-sample unequal variance test (this is default)
==== Script
The `t_test` metric supports scripting. For example, if we need to adjust out load times for the before values, we could use
a script to recalculate them on-the-fly:
[source,console]
--------------------------------------------------
GET node_upgrade/_search
{
"size": 0,
"aggs" : {
"startup_time_ttest" : {
"t_test" : {
"a": {
"script" : {
"lang": "painless",
"source": "doc['startup_time_before'].value - params.adjustment", <1>
"params" : {
"adjustment" : 10 <2>
}
}
},
"b": {
"field": "startup_time_after" <3>
},
"type": "paired"
}
}
}
}
--------------------------------------------------
// TEST[setup:node_upgrade]
<1> The `field` parameter is replaced with a `script` parameter, which uses the
script to generate values which percentiles are calculated on
<2> Scripting supports parameterized input just like any other script
<3> We can mix scripts and fields

View File

@ -18,6 +18,8 @@ dependencies {
compileOnly project(path: xpackModule('core'), configuration: 'default')
testCompile project(path: xpackModule('core'), configuration: 'testArtifacts')
compile 'org.apache.commons:commons-math3:3.2'
}
integTest.enabled = false

View File

@ -0,0 +1 @@
ec2544ab27e110d2d431bdad7d538ed509b21e62

View File

@ -0,0 +1,475 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
derived from unicode conversion examples available at
http://www.unicode.org/Public/PROGRAMS/CVTUTF. Here is the copyright
from those sources:
/*
* Copyright 2001-2004 Unicode, Inc.
*
* Disclaimer
*
* This source code is provided as is by Unicode, Inc. No claims are
* made as to fitness for any particular purpose. No warranties of any
* kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been
* purchased on magnetic or optical media from Unicode, Inc., the
* sole remedy for any claim will be exchange of defective media
* within 90 days of receipt.
*
* Limitations on Rights to Redistribute This Code
*
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
* for internal or external distribution as long as this notice
* remains attached.
*/
Some code in core/src/java/org/apache/lucene/util/ArrayUtil.java was
derived from Python 2.4.2 sources available at
http://www.python.org. Full license is here:
http://www.python.org/download/releases/2.4.2/license/
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
derived from Python 3.1.2 sources available at
http://www.python.org. Full license is here:
http://www.python.org/download/releases/3.1.2/license/
Some code in core/src/java/org/apache/lucene/util/automaton was
derived from Brics automaton sources available at
www.brics.dk/automaton/. Here is the copyright from those sources:
/*
* Copyright (c) 2001-2009 Anders Moeller
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
The levenshtein automata tables in core/src/java/org/apache/lucene/util/automaton
were automatically generated with the moman/finenight FSA package.
Here is the copyright for those sources:
# Copyright (c) 2010, Jean-Philippe Barrette-LaPierre, <jpb@rrette.com>
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
derived from ICU (http://www.icu-project.org)
The full license is available here:
http://source.icu-project.org/repos/icu/icu/trunk/license.html
/*
* Copyright (C) 1999-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, and/or sell copies of the
* Software, and to permit persons to whom the Software is furnished to do so,
* provided that the above copyright notice(s) and this permission notice appear
* in all copies of the Software and that both the above copyright notice(s) and
* this permission notice appear in supporting documentation.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
* LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR
* ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
* IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall not
* be used in advertising or otherwise to promote the sale, use or other
* dealings in this Software without prior written authorization of the
* copyright holder.
*/
The following license applies to the Snowball stemmers:
Copyright (c) 2001, Dr Martin Porter
Copyright (c) 2002, Richard Boulton
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holders nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The following license applies to the KStemmer:
Copyright © 2003,
Center for Intelligent Information Retrieval,
University of Massachusetts, Amherst.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. The names "Center for Intelligent Information Retrieval" and
"University of Massachusetts" must not be used to endorse or promote products
derived from this software without prior written permission. To obtain
permission, contact info@ciir.cs.umass.edu.
THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
The following license applies to the Morfologik project:
Copyright (c) 2006 Dawid Weiss
Copyright (c) 2007-2011 Dawid Weiss, Marcin Miłkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Morfologik nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---
The dictionary comes from Morfologik project. Morfologik uses data from
Polish ispell/myspell dictionary hosted at http://www.sjp.pl/slownik/en/ and
is licenced on the terms of (inter alia) LGPL and Creative Commons
ShareAlike. The part-of-speech tags were added in Morfologik project and
are not found in the data from sjp.pl. The tagset is similar to IPI PAN
tagset.
---
The following license applies to the Morfeusz project,
used by org.apache.lucene.analysis.morfologik.
BSD-licensed dictionary of Polish (SGJP)
http://sgjp.pl/morfeusz/
Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński,
Marcin Woliński, Robert Wołosz
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,9 @@
Apache Commons Math
Copyright 2001-2020 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
This product includes software developed for Orekit by
CS Systèmes d'Information (http://www.c-s.fr/)
Copyright 2010-2012 CS Systèmes d'Information

View File

@ -39,6 +39,11 @@ import org.elasticsearch.xpack.analytics.stringstats.StringStatsAggregationBuild
import org.elasticsearch.xpack.analytics.topmetrics.InternalTopMetrics;
import org.elasticsearch.xpack.analytics.topmetrics.TopMetricsAggregationBuilder;
import org.elasticsearch.xpack.analytics.topmetrics.TopMetricsAggregatorFactory;
import org.elasticsearch.xpack.analytics.ttest.InternalTTest;
import org.elasticsearch.xpack.analytics.ttest.PairedTTestState;
import org.elasticsearch.xpack.analytics.ttest.TTestAggregationBuilder;
import org.elasticsearch.xpack.analytics.ttest.TTestState;
import org.elasticsearch.xpack.analytics.ttest.UnpairedTTestState;
import org.elasticsearch.xpack.core.XPackField;
import org.elasticsearch.xpack.core.XPackPlugin;
import org.elasticsearch.xpack.core.analytics.action.AnalyticsStatsAction;
@ -91,7 +96,12 @@ public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugi
TopMetricsAggregationBuilder.NAME,
TopMetricsAggregationBuilder::new,
usage.track(AnalyticsUsage.Item.TOP_METRICS, checkLicense(TopMetricsAggregationBuilder.PARSER)))
.addResultReader(InternalTopMetrics::new)
.addResultReader(InternalTopMetrics::new),
new AggregationSpec(
TTestAggregationBuilder.NAME,
TTestAggregationBuilder::new,
usage.track(AnalyticsUsage.Item.T_TEST, checkLicense(TTestAggregationBuilder.PARSER)))
.addResultReader(InternalTTest::new)
);
}
@ -131,6 +141,14 @@ public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugi
return singletonList(new AnalyticsUsage());
}
@Override
public List<NamedWriteableRegistry.Entry> getNamedWriteables() {
return Arrays.asList(
new NamedWriteableRegistry.Entry(TTestState.class, PairedTTestState.NAME, PairedTTestState::new),
new NamedWriteableRegistry.Entry(TTestState.class, UnpairedTTestState.NAME, UnpairedTTestState::new)
);
}
private static <T> ContextParser<String, T> checkLicense(ContextParser<String, T> realParser) {
return (parser, name) -> {
if (getLicenseState().isAnalyticsAllowed() == false) {

View File

@ -25,7 +25,8 @@ public class AnalyticsUsage {
BOXPLOT,
CUMULATIVE_CARDINALITY,
STRING_STATS,
TOP_METRICS;
TOP_METRICS,
T_TEST;
}
private final Map<Item, AtomicLong> trackers = new EnumMap<>(Item.class);
@ -54,6 +55,7 @@ public class AnalyticsUsage {
trackers.get(Item.BOXPLOT).get(),
trackers.get(Item.CUMULATIVE_CARDINALITY).get(),
trackers.get(Item.STRING_STATS).get(),
trackers.get(Item.TOP_METRICS).get());
trackers.get(Item.TOP_METRICS).get(),
trackers.get(Item.T_TEST).get());
}
}

View File

@ -0,0 +1,99 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.metrics.InternalNumericMetricsAggregation;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public class InternalTTest extends InternalNumericMetricsAggregation.SingleValue implements TTest {
protected final TTestState state;
InternalTTest(String name, TTestState state, DocValueFormat formatter, Map<String, Object> metadata) {
super(name, metadata);
this.state = state;
this.format = formatter;
}
/**
* Read from a stream.
*/
public InternalTTest(StreamInput in) throws IOException {
super(in);
format = in.readNamedWriteable(DocValueFormat.class);
state = in.readNamedWriteable(TTestState.class);
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
out.writeNamedWriteable(format);
out.writeNamedWriteable(state);
}
@Override
public String getWriteableName() {
return TTestAggregationBuilder.NAME;
}
// for testing only
DocValueFormat format() {
return format;
}
@Override
public InternalTTest reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
TTestState reduced = state.reduce(aggregations.stream().map(a -> ((InternalTTest) a).state));
return new InternalTTest(name, reduced, format, getMetadata());
}
@Override
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
double value = state.getValue();
boolean hasValue = Double.isNaN(value) == false;
builder.field(CommonFields.VALUE.getPreferredName(), hasValue ? value : null);
if (hasValue && format != DocValueFormat.RAW) {
builder.field(CommonFields.VALUE_AS_STRING.getPreferredName(), format.format(value).toString());
}
return builder;
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), state);
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null || getClass() != obj.getClass()) return false;
if (super.equals(obj) == false) return false;
InternalTTest that = (InternalTTest) obj;
return Objects.equals(state, that.state);
}
@Override
public double value() {
return state.getValue();
}
@Override
public double getValue() {
return state.getValue();
}
}

View File

@ -0,0 +1,91 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.apache.lucene.index.LeafReaderContext;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.metrics.CompensatedSum;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.MultiValuesSource;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.xpack.analytics.ttest.TTestAggregationBuilder.A_FIELD;
import static org.elasticsearch.xpack.analytics.ttest.TTestAggregationBuilder.B_FIELD;
public class PairedTTestAggregator extends TTestAggregator<PairedTTestState> {
private TTestStatsBuilder statsBuilder;
PairedTTestAggregator(String name, MultiValuesSource.NumericMultiValuesSource valuesSources, int tails, DocValueFormat format,
SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metadata) throws IOException {
super(name, valuesSources, tails, format, context, parent, pipelineAggregators, metadata);
statsBuilder = new TTestStatsBuilder(context.bigArrays());
}
@Override
protected PairedTTestState getState(long bucket) {
return new PairedTTestState(statsBuilder.get(bucket), tails);
}
@Override
protected PairedTTestState getEmptyState() {
return new PairedTTestState(new TTestStats(0, 0, 0), tails);
}
@Override
protected long size() {
return statsBuilder.getSize();
}
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
final LeafBucketCollector sub) throws IOException {
if (valuesSources == null) {
return LeafBucketCollector.NO_OP_COLLECTOR;
}
final BigArrays bigArrays = context.bigArrays();
final SortedNumericDoubleValues docAValues = valuesSources.getField(A_FIELD.getPreferredName(), ctx);
final SortedNumericDoubleValues docBValues = valuesSources.getField(B_FIELD.getPreferredName(), ctx);
final CompensatedSum compDiffSum = new CompensatedSum(0, 0);
final CompensatedSum compDiffSumOfSqr = new CompensatedSum(0, 0);
return new LeafBucketCollectorBase(sub, docAValues) {
@Override
public void collect(int doc, long bucket) throws IOException {
if (docAValues.advanceExact(doc) && docBValues.advanceExact(doc)) {
if (docAValues.docValueCount() > 1 || docBValues.docValueCount() > 1) {
throw new AggregationExecutionException("Encountered more than one value for a " +
"single document. Use a script to combine multiple values per doc into a single value.");
}
statsBuilder.grow(bigArrays, bucket + 1);
// There should always be one value if advanceExact lands us here, either
// a real value or a `missing` value
assert docAValues.docValueCount() == 1;
assert docBValues.docValueCount() == 1;
double diff = docAValues.nextValue() - docBValues.nextValue();
statsBuilder.addValue(compDiffSum, compDiffSumOfSqr, bucket, diff);
}
}
};
}
@Override
public void doClose() {
Releasables.close(statsBuilder);
}
}

View File

@ -0,0 +1,91 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.apache.commons.math3.distribution.TDistribution;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import java.io.IOException;
import java.util.Objects;
import java.util.stream.Stream;
public class PairedTTestState implements TTestState {
public static final String NAME = "P";
private final TTestStats stats;
private final int tails;
public PairedTTestState(TTestStats stats, int tails) {
this.stats = stats;
this.tails = tails;
}
public PairedTTestState(StreamInput in) throws IOException {
stats = new TTestStats(in);
tails = in.readVInt();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
stats.writeTo(out);
out.writeVInt(tails);
}
@Override
public double getValue() {
if (stats.count < 2) {
return Double.NaN;
}
long n = stats.count - 1;
double meanDiff = stats.sum / stats.count;
double variance = (stats.sumOfSqrs - ((stats.sum * stats.sum) / stats.count)) / stats.count;
if (variance <= 0.0) {
return meanDiff == 0.0 ? Double.NaN : 0.0;
}
double stdDiv = Math.sqrt(variance);
double stdErr = stdDiv / Math.sqrt(n);
double t = Math.abs(meanDiff / stdErr);
TDistribution dist = new TDistribution(n);
return dist.cumulativeProbability(-t) * tails;
}
@Override
public TTestState reduce(Stream<TTestState> states) {
TTestStats.Reducer reducer = new TTestStats.Reducer();
states.forEach(tTestState -> {
PairedTTestState state = (PairedTTestState) tTestState;
reducer.accept(state.stats);
if (state.tails != tails) {
throw new IllegalStateException("Incompatible tails value in the reduce. Expected "
+ state.tails + " reduced with " + tails);
}
});
return new PairedTTestState(reducer.result(), tails);
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
PairedTTestState that = (PairedTTestState) o;
return tails == that.tails &&
stats.equals(that.stats);
}
@Override
public int hashCode() {
return Objects.hash(stats, tails);
}
}

View File

@ -0,0 +1,15 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
public interface TTest extends NumericMetricsAggregation.SingleValue {
double getValue();
}

View File

@ -0,0 +1,135 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.support.MultiValuesSourceAggregationBuilder;
import org.elasticsearch.search.aggregations.support.MultiValuesSourceAggregatorFactory;
import org.elasticsearch.search.aggregations.support.MultiValuesSourceFieldConfig;
import org.elasticsearch.search.aggregations.support.MultiValuesSourceParseHelper;
import org.elasticsearch.search.aggregations.support.ValueType;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
public class TTestAggregationBuilder extends MultiValuesSourceAggregationBuilder.LeafOnly<ValuesSource.Numeric, TTestAggregationBuilder> {
public static final String NAME = "t_test";
public static final ParseField A_FIELD = new ParseField("a");
public static final ParseField B_FIELD = new ParseField("b");
public static final ParseField TYPE_FIELD = new ParseField("type");
public static final ParseField TAILS_FIELD = new ParseField("tails");
public static final ObjectParser<TTestAggregationBuilder, String> PARSER =
ObjectParser.fromBuilder(NAME, TTestAggregationBuilder::new);
static {
MultiValuesSourceParseHelper.declareCommon(PARSER, true, ValueType.NUMERIC);
MultiValuesSourceParseHelper.declareField(A_FIELD.getPreferredName(), PARSER, true, false);
MultiValuesSourceParseHelper.declareField(B_FIELD.getPreferredName(), PARSER, true, false);
PARSER.declareString(TTestAggregationBuilder::testType, TYPE_FIELD);
PARSER.declareInt(TTestAggregationBuilder::tails, TAILS_FIELD);
}
private TTestType testType = TTestType.HETEROSCEDASTIC;
private int tails = 2;
public TTestAggregationBuilder(String name) {
super(name, ValueType.NUMERIC);
}
public TTestAggregationBuilder(TTestAggregationBuilder clone,
AggregatorFactories.Builder factoriesBuilder,
Map<String, Object> metadata) {
super(clone, factoriesBuilder, metadata);
}
public TTestAggregationBuilder a(MultiValuesSourceFieldConfig valueConfig) {
field(A_FIELD.getPreferredName(), Objects.requireNonNull(valueConfig, "Configuration for field [" + A_FIELD + "] cannot be null"));
return this;
}
public TTestAggregationBuilder b(MultiValuesSourceFieldConfig weightConfig) {
field(B_FIELD.getPreferredName(), Objects.requireNonNull(weightConfig, "Configuration for field [" + B_FIELD + "] cannot be null"));
return this;
}
public TTestAggregationBuilder testType(String testType) {
return testType(TTestType.resolve(Objects.requireNonNull(testType, "Test type cannot be null")));
}
public TTestAggregationBuilder testType(TTestType testType) {
this.testType = Objects.requireNonNull(testType, "Test type cannot be null");
return this;
}
public TTestAggregationBuilder tails(int tails) {
if (tails < 1 || tails > 2) {
throw new IllegalArgumentException(
"[tails] must be 1 or 2. Found [" + tails + "] in [" + name + "]");
}
this.tails = tails;
return this;
}
public TTestAggregationBuilder(StreamInput in) throws IOException {
super(in, ValueType.NUMERIC);
testType = in.readEnum(TTestType.class);
tails = in.readVInt();
}
@Override
protected AggregationBuilder shallowCopy(AggregatorFactories.Builder factoriesBuilder, Map<String, Object> metadata) {
return new TTestAggregationBuilder(this, factoriesBuilder, metadata);
}
@Override
public BucketCardinality bucketCardinality() {
return BucketCardinality.NONE;
}
@Override
protected void innerWriteTo(StreamOutput out) throws IOException {
out.writeEnum(testType);
out.writeVInt(tails);
}
@Override
protected MultiValuesSourceAggregatorFactory<ValuesSource.Numeric> innerBuild(
QueryShardContext queryShardContext,
Map<String, ValuesSourceConfig<ValuesSource.Numeric>> configs,
DocValueFormat format,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
return new TTestAggregatorFactory(name, configs, testType, tails, format, queryShardContext, parent, subFactoriesBuilder, metadata);
}
@Override
public XContentBuilder doXContentBody(XContentBuilder builder, ToXContent.Params params) throws IOException {
return builder;
}
@Override
public String getType() {
return NAME;
}
}

View File

@ -0,0 +1,67 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.apache.lucene.search.ScoreMode;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.MultiValuesSource;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public abstract class TTestAggregator<T extends TTestState> extends NumericMetricsAggregator.SingleValue {
protected final MultiValuesSource.NumericMultiValuesSource valuesSources;
protected final int tails;
private DocValueFormat format;
TTestAggregator(String name, MultiValuesSource.NumericMultiValuesSource valuesSources, int tails, DocValueFormat format,
SearchContext context, Aggregator parent,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metadata) throws IOException {
super(name, context, parent, pipelineAggregators, metadata);
this.valuesSources = valuesSources;
this.tails = tails;
this.format = format;
}
@Override
public ScoreMode scoreMode() {
return valuesSources != null && valuesSources.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
}
protected abstract T getState(long bucket);
protected abstract T getEmptyState();
protected abstract long size();
@Override
public InternalAggregation buildAggregation(long bucket) {
if (valuesSources == null || bucket >= size()) {
return buildEmptyAggregation();
}
return new InternalTTest(name, getState(bucket), format, metadata());
}
@Override
public InternalAggregation buildEmptyAggregation() {
return new InternalTTest(name, getEmptyState(), format, metadata());
}
@Override
public double metric(long owningBucketOrd) {
return getState(owningBucketOrd).getValue();
}
}

View File

@ -0,0 +1,82 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.MultiValuesSource;
import org.elasticsearch.search.aggregations.support.MultiValuesSourceAggregatorFactory;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.List;
import java.util.Map;
class TTestAggregatorFactory extends MultiValuesSourceAggregatorFactory<ValuesSource.Numeric>{
private final TTestType testType;
private final int tails;
TTestAggregatorFactory(String name, Map<String, ValuesSourceConfig<ValuesSource.Numeric>> configs, TTestType testType, int tails,
DocValueFormat format, QueryShardContext queryShardContext, AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metadata) throws IOException {
super(name, configs, format, queryShardContext, parent, subFactoriesBuilder, metadata);
this.testType = testType;
this.tails = tails;
}
@Override
protected Aggregator createUnmapped(SearchContext searchContext,
Aggregator parent,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metadata) throws IOException {
switch (testType) {
case PAIRED:
return new PairedTTestAggregator(name, null, tails, format, searchContext, parent, pipelineAggregators, metadata);
case HOMOSCEDASTIC:
return new UnpairedTTestAggregator(name, null, tails, true, format, searchContext, parent, pipelineAggregators, metadata);
case HETEROSCEDASTIC:
return new UnpairedTTestAggregator(name, null, tails, false, format, searchContext, parent, pipelineAggregators, metadata);
default:
throw new IllegalArgumentException("Unsupported t-test type " + testType);
}
}
@Override
protected Aggregator doCreateInternal(SearchContext searchContext,
Map<String, ValuesSourceConfig<ValuesSource.Numeric>> configs,
DocValueFormat format,
Aggregator parent,
boolean collectsFromSingleBucket,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metadata) throws IOException {
MultiValuesSource.NumericMultiValuesSource numericMultiVS
= new MultiValuesSource.NumericMultiValuesSource(configs, queryShardContext);
if (numericMultiVS.areValuesSourcesEmpty()) {
return createUnmapped(searchContext, parent, pipelineAggregators, metadata);
}
switch (testType) {
case PAIRED:
return new PairedTTestAggregator(name, numericMultiVS, tails, format, searchContext, parent, pipelineAggregators, metadata);
case HOMOSCEDASTIC:
return new UnpairedTTestAggregator(name, numericMultiVS, tails, true, format, searchContext, parent, pipelineAggregators,
metadata);
case HETEROSCEDASTIC:
return new UnpairedTTestAggregator(name, numericMultiVS, tails, false, format, searchContext, parent, pipelineAggregators,
metadata);
default:
throw new IllegalArgumentException("Unsupported t-test type " + testType);
}
}
}

View File

@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.elasticsearch.common.io.stream.NamedWriteable;
import java.util.stream.Stream;
/**
* Base class for t-test aggregation state
*/
public interface TTestState extends NamedWriteable {
double getValue();
TTestState reduce(Stream<TTestState> states);
}

View File

@ -0,0 +1,85 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.search.aggregations.metrics.CompensatedSum;
import java.io.IOException;
import java.util.Objects;
import java.util.function.Consumer;
/**
* Collects basic stats that are needed to perform t-test
*/
public class TTestStats implements Writeable {
public final long count;
public final double sum;
public final double sumOfSqrs;
public TTestStats(long count, double sum, double sumOfSqrs) {
this.count = count;
this.sum = sum;
this.sumOfSqrs = sumOfSqrs;
}
public TTestStats(StreamInput in) throws IOException {
count = in.readVLong();
sum = in.readDouble();
sumOfSqrs = in.readDouble();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVLong(count);
out.writeDouble(sum);
out.writeDouble(sumOfSqrs);
}
public double variance() {
double v = (sumOfSqrs - ((sum * sum) / count)) / (count - 1);
return v < 0 ? 0 : v;
}
public double average() {
return sum / count;
}
public static class Reducer implements Consumer<TTestStats> {
private long count = 0;
CompensatedSum compSum = new CompensatedSum(0, 0);
CompensatedSum compSumOfSqrs = new CompensatedSum(0, 0);
@Override
public void accept(TTestStats stat) {
count += stat.count;
compSum.add(stat.sum);
compSumOfSqrs.add(stat.sumOfSqrs);
}
public TTestStats result() {
return new TTestStats(count, compSum.value(), compSumOfSqrs.value());
}
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
TTestStats that = (TTestStats) o;
return count == that.count &&
Double.compare(that.sum, sum) == 0 &&
Double.compare(that.sumOfSqrs, sumOfSqrs) == 0;
}
@Override
public int hashCode() {
return Objects.hash(count, sum, sumOfSqrs);
}
}

View File

@ -0,0 +1,78 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.elasticsearch.common.lease.Releasable;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.DoubleArray;
import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.search.aggregations.metrics.CompensatedSum;
public class TTestStatsBuilder implements Releasable {
private LongArray counts;
private DoubleArray sums;
private DoubleArray compensations;
private DoubleArray sumOfSqrs;
private DoubleArray sumOfSqrCompensations;
TTestStatsBuilder(BigArrays bigArrays) {
counts = bigArrays.newLongArray(1, true);
sums = bigArrays.newDoubleArray(1, true);
compensations = bigArrays.newDoubleArray(1, true);
sumOfSqrs = bigArrays.newDoubleArray(1, true);
sumOfSqrCompensations = bigArrays.newDoubleArray(1, true);
}
public TTestStats get(long bucket) {
return new TTestStats(counts.get(bucket), sums.get(bucket), sumOfSqrs.get(bucket));
}
public long build(long bucket) {
return counts.get(bucket);
}
public long getSize() {
return counts.size();
}
public void grow(BigArrays bigArrays, long buckets) {
if (buckets >= counts.size()) {
long overSize = BigArrays.overSize(buckets);
counts = bigArrays.resize(counts, overSize);
sums = bigArrays.resize(sums, overSize);
compensations = bigArrays.resize(compensations, overSize);
sumOfSqrs = bigArrays.resize(sumOfSqrs, overSize);
sumOfSqrCompensations = bigArrays.resize(sumOfSqrCompensations, overSize);
}
}
public void addValue(CompensatedSum compSum, CompensatedSum compSumOfSqr, long bucket, double val) {
counts.increment(bucket, 1);
double sum = sums.get(bucket);
double compensation = compensations.get(bucket);
compSum.reset(sum, compensation);
double sumOfSqr = sumOfSqrs.get(bucket);
double sumOfSqrCompensation = sumOfSqrCompensations.get(bucket);
compSumOfSqr.reset(sumOfSqr, sumOfSqrCompensation);
compSum.add(val);
compSumOfSqr.add(val * val);
sums.set(bucket, compSum.value());
compensations.set(bucket, compSum.delta());
sumOfSqrs.set(bucket, compSumOfSqr.value());
sumOfSqrCompensations.set(bucket, compSumOfSqr.delta());
}
@Override
public void close() {
Releasables.close(counts, sums, compensations, sumOfSqrs, sumOfSqrCompensations);
}
}

View File

@ -0,0 +1,25 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import java.util.Locale;
/**
* T-test type, paired, unpaired equal variance, unpaired unequal variance
*/
public enum TTestType {
PAIRED, HOMOSCEDASTIC, HETEROSCEDASTIC;
public static TTestType resolve(String name) {
return TTestType.valueOf(name.toUpperCase(Locale.ROOT));
}
public String value() {
return name().toLowerCase(Locale.ROOT);
}
}

View File

@ -0,0 +1,99 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.apache.lucene.index.LeafReaderContext;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.metrics.CompensatedSum;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.MultiValuesSource;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.xpack.analytics.ttest.TTestAggregationBuilder.A_FIELD;
import static org.elasticsearch.xpack.analytics.ttest.TTestAggregationBuilder.B_FIELD;
public class UnpairedTTestAggregator extends TTestAggregator<UnpairedTTestState> {
private final TTestStatsBuilder a;
private final TTestStatsBuilder b;
private final boolean homoscedastic;
UnpairedTTestAggregator(String name, MultiValuesSource.NumericMultiValuesSource valuesSources, int tails, boolean homoscedastic,
DocValueFormat format, SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metadata) throws IOException {
super(name, valuesSources, tails, format, context, parent, pipelineAggregators, metadata);
BigArrays bigArrays = context.bigArrays();
a = new TTestStatsBuilder(bigArrays);
b = new TTestStatsBuilder(bigArrays);
this.homoscedastic = homoscedastic;
}
@Override
protected UnpairedTTestState getState(long bucket) {
return new UnpairedTTestState(a.get(bucket), b.get(bucket), homoscedastic, tails);
}
@Override
protected UnpairedTTestState getEmptyState() {
return new UnpairedTTestState(new TTestStats(0, 0, 0), new TTestStats(0, 0, 0), homoscedastic, tails);
}
@Override
protected long size() {
return a.getSize();
}
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
final LeafBucketCollector sub) throws IOException {
if (valuesSources == null) {
return LeafBucketCollector.NO_OP_COLLECTOR;
}
final BigArrays bigArrays = context.bigArrays();
final SortedNumericDoubleValues docAValues = valuesSources.getField(A_FIELD.getPreferredName(), ctx);
final SortedNumericDoubleValues docBValues = valuesSources.getField(B_FIELD.getPreferredName(), ctx);
final CompensatedSum compSumA = new CompensatedSum(0, 0);
final CompensatedSum compSumOfSqrA = new CompensatedSum(0, 0);
final CompensatedSum compSumB = new CompensatedSum(0, 0);
final CompensatedSum compSumOfSqrB = new CompensatedSum(0, 0);
return new LeafBucketCollectorBase(sub, docAValues) {
private void processValues(int doc, long bucket, SortedNumericDoubleValues docValues, CompensatedSum compSum,
CompensatedSum compSumOfSqr, TTestStatsBuilder builder) throws IOException {
if (docValues.advanceExact(doc)) {
final int numValues = docValues.docValueCount();
for (int i = 0; i < numValues; i++) {
builder.addValue(compSum, compSumOfSqr, bucket, docValues.nextValue());
}
}
}
@Override
public void collect(int doc, long bucket) throws IOException {
a.grow(bigArrays, bucket + 1);
b.grow(bigArrays, bucket + 1);
processValues(doc, bucket, docAValues, compSumA, compSumOfSqrA, a);
processValues(doc, bucket, docBValues, compSumB, compSumOfSqrB, b);
}
};
}
@Override
public void doClose() {
Releasables.close(a, b);
}
}

View File

@ -0,0 +1,120 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.apache.commons.math3.distribution.TDistribution;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import java.io.IOException;
import java.util.Objects;
import java.util.stream.Stream;
public class UnpairedTTestState implements TTestState {
public static final String NAME = "U";
private final TTestStats a;
private final TTestStats b;
private boolean homoscedastic;
private int tails;
public UnpairedTTestState(TTestStats a, TTestStats b, boolean homoscedastic, int tails) {
this.a = a;
this.b = b;
this.homoscedastic = homoscedastic;
this.tails = tails;
}
public UnpairedTTestState(StreamInput in) throws IOException {
a = new TTestStats(in);
b = new TTestStats(in);
homoscedastic = in.readBoolean();
tails = in.readVInt();
}
@Override
public double getValue() {
if (a.count < 2 || b.count < 2) {
return Double.NaN;
}
if (homoscedastic) {
long n = a.count + b.count - 2;
double variance = ((a.count - 1) * a.variance() + (b.count - 1) * b.variance()) / n;
double nn = (1.0 / a.count + 1.0 / b.count);
return p(variance * nn, n);
} else {
double s2an = a.variance() / a.count;
double s2bn = b.variance() / b.count;
double variance = s2an + s2bn;
double degreeOfFreedom = variance * variance / (s2an * s2an / (a.count - 1) + s2bn * s2bn / (b.count - 1));
return p(variance, degreeOfFreedom);
}
}
private double p(double sd2, double degreesOfFreedom) {
if (degreesOfFreedom < 0) {
return Double.NaN;
}
double sd = Math.sqrt(sd2);
double meanDiff = a.average() - b.average();
double t = Math.abs(meanDiff / sd);
TDistribution dist = new TDistribution(degreesOfFreedom);
return dist.cumulativeProbability(-t) * tails;
}
@Override
public TTestState reduce(Stream<TTestState> states) {
TTestStats.Reducer reducerA = new TTestStats.Reducer();
TTestStats.Reducer reducerB = new TTestStats.Reducer();
states.forEach(tTestState -> {
UnpairedTTestState state = (UnpairedTTestState) tTestState;
if (state.homoscedastic != homoscedastic) {
throw new IllegalStateException("Incompatible homoscedastic mode in the reduce. Expected "
+ state.homoscedastic + " reduced with " + homoscedastic);
}
if (state.tails != tails) {
throw new IllegalStateException("Incompatible tails value in the reduce. Expected "
+ state.tails + " reduced with " + tails);
}
reducerA.accept(state.a);
reducerB.accept(state.b);
});
return new UnpairedTTestState(reducerA.result(), reducerB.result(), homoscedastic, tails);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
a.writeTo(out);
b.writeTo(out);
out.writeBoolean(homoscedastic);
out.writeVInt(tails);
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
UnpairedTTestState that = (UnpairedTTestState) o;
return homoscedastic == that.homoscedastic &&
tails == that.tails &&
a.equals(that.a) &&
b.equals(that.b);
}
@Override
public int hashCode() {
return Objects.hash(a, b, homoscedastic, tails);
}
}

View File

@ -58,20 +58,21 @@ public class TransportAnalyticsStatsActionTests extends ESTestCase {
}
public void test() throws IOException {
AnalyticsUsage.Item item = randomFrom(AnalyticsUsage.Item.values());
AnalyticsUsage realUsage = new AnalyticsUsage();
AnalyticsUsage emptyUsage = new AnalyticsUsage();
ContextParser<Void, Void> parser = realUsage.track(item, (p, c) -> c);
ObjectPath unused = run(realUsage, emptyUsage);
assertThat(unused.evaluate("stats.0." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
assertThat(unused.evaluate("stats.1." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
int count = between(1, 10000);
for (int i = 0; i < count; i++) {
assertNull(parser.parse(null, null));
for (AnalyticsUsage.Item item : AnalyticsUsage.Item.values()) {
AnalyticsUsage realUsage = new AnalyticsUsage();
AnalyticsUsage emptyUsage = new AnalyticsUsage();
ContextParser<Void, Void> parser = realUsage.track(item, (p, c) -> c);
ObjectPath unused = run(realUsage, emptyUsage);
assertThat(unused.evaluate("stats.0." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
assertThat(unused.evaluate("stats.1." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
int count = between(1, 10000);
for (int i = 0; i < count; i++) {
assertNull(parser.parse(null, null));
}
ObjectPath used = run(realUsage, emptyUsage);
assertThat(item.name(), used.evaluate("stats.0." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(count));
assertThat(item.name(), used.evaluate("stats.1." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
}
ObjectPath used = run(realUsage, emptyUsage);
assertThat(used.evaluate("stats.0." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(count));
assertThat(used.evaluate("stats.1." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
}
private ObjectPath run(AnalyticsUsage... nodeUsages) throws IOException {

View File

@ -0,0 +1,131 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.ParsedAggregation;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.test.InternalAggregationTestCase;
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static java.util.Collections.emptyList;
public class InternalTTestTests extends InternalAggregationTestCase<InternalTTest> {
private TTestType type = randomFrom(TTestType.values());
private int tails = randomIntBetween(1, 2);
@Override
protected InternalTTest createTestInstance(String name, Map<String, Object> metadata) {
TTestState state = randomState();
DocValueFormat formatter = randomNumericDocValueFormat();
return new InternalTTest(name, state, formatter, metadata);
}
private TTestState randomState() {
if (type == TTestType.PAIRED) {
return new PairedTTestState(randomStats(), tails);
} else {
return new UnpairedTTestState(randomStats(), randomStats(), type == TTestType.HOMOSCEDASTIC, tails);
}
}
private TTestStats randomStats() {
return new TTestStats(randomNonNegativeLong(), randomDouble(), randomDouble());
}
@Override
protected Writeable.Reader<InternalTTest> instanceReader() {
return InternalTTest::new;
}
@Override
protected void assertReduced(InternalTTest reduced, List<InternalTTest> inputs) {
TTestState expected = reduced.state.reduce(inputs.stream().map(a -> a.state));
assertNotNull(expected);
assertEquals(expected.getValue(), reduced.getValue(), 0.00001);
}
@Override
protected void assertFromXContent(InternalTTest min, ParsedAggregation parsedAggregation) {
// There is no ParsedTTest yet so we cannot test it here
}
@Override
protected InternalTTest mutateInstance(InternalTTest instance) {
String name = instance.getName();
TTestState state;
try (BytesStreamOutput output = new BytesStreamOutput()) {
output.writeNamedWriteable(instance.state);
try (StreamInput in = new NamedWriteableAwareStreamInput(output.bytes().streamInput(), getNamedWriteableRegistry())) {
state = in.readNamedWriteable(TTestState.class);
}
} catch (IOException ex) {
throw new IllegalStateException(ex);
}
DocValueFormat formatter = instance.format();
List<PipelineAggregator> pipelineAggregators = instance.pipelineAggregators();
Map<String, Object> metadata = instance.getMetadata();
switch (between(0, 2)) {
case 0:
name += randomAlphaOfLength(5);
break;
case 1:
state = randomState();
break;
case 2:
if (metadata == null) {
metadata = new HashMap<>(1);
} else {
metadata = new HashMap<>(instance.getMetadata());
}
metadata.put(randomAlphaOfLength(15), randomInt());
break;
default:
throw new AssertionError("Illegal randomisation branch");
}
return new InternalTTest(name, state, formatter, metadata);
}
@Override
protected List<NamedXContentRegistry.Entry> getNamedXContents() {
List<NamedXContentRegistry.Entry> extendedNamedXContents = new ArrayList<>(super.getNamedXContents());
extendedNamedXContents.add(new NamedXContentRegistry.Entry(Aggregation.class,
new ParseField(TTestAggregationBuilder.NAME),
(p, c) -> {
assumeTrue("There is no ParsedTTest yet", false);
return null;
}
));
return extendedNamedXContents;
}
@Override
protected NamedWriteableRegistry getNamedWriteableRegistry() {
List<NamedWriteableRegistry.Entry> entries = new ArrayList<>();
entries.addAll(new SearchModule(Settings.EMPTY, false, emptyList()).getNamedWriteables());
entries.addAll(new AnalyticsPlugin(Settings.EMPTY).getNamedWriteables());
return new NamedWriteableRegistry(entries);
}
}

View File

@ -0,0 +1,84 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.script.Script;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.BaseAggregationBuilder;
import org.elasticsearch.search.aggregations.support.MultiValuesSourceFieldConfig;
import org.elasticsearch.test.AbstractSerializingTestCase;
import org.junit.Before;
import java.io.IOException;
import static java.util.Collections.singletonList;
import static org.hamcrest.Matchers.hasSize;
public class TTestAggregationBuilderTests extends AbstractSerializingTestCase<TTestAggregationBuilder> {
String aggregationName;
@Before
public void setupName() {
aggregationName = randomAlphaOfLength(10);
}
@Override
protected NamedXContentRegistry xContentRegistry() {
return new NamedXContentRegistry(singletonList(new NamedXContentRegistry.Entry(
BaseAggregationBuilder.class,
new ParseField(TTestAggregationBuilder.NAME),
(p, n) -> TTestAggregationBuilder.PARSER.apply(p, (String) n))));
}
@Override
protected TTestAggregationBuilder doParseInstance(XContentParser parser) throws IOException {
assertSame(XContentParser.Token.START_OBJECT, parser.nextToken());
AggregatorFactories.Builder parsed = AggregatorFactories.parseAggregators(parser);
assertThat(parsed.getAggregatorFactories(), hasSize(1));
assertThat(parsed.getPipelineAggregatorFactories(), hasSize(0));
TTestAggregationBuilder agg = (TTestAggregationBuilder) parsed.getAggregatorFactories().iterator().next();
assertNull(parser.nextToken());
assertNotNull(agg);
return agg;
}
@Override
protected TTestAggregationBuilder createTestInstance() {
MultiValuesSourceFieldConfig aConfig;
if (randomBoolean()) {
aConfig = new MultiValuesSourceFieldConfig.Builder().setFieldName("a_field").build();
} else {
aConfig = new MultiValuesSourceFieldConfig.Builder().setScript(new Script(randomAlphaOfLength(10))).build();
}
MultiValuesSourceFieldConfig bConfig;
if (randomBoolean()) {
bConfig = new MultiValuesSourceFieldConfig.Builder().setFieldName("b_field").build();
} else {
bConfig = new MultiValuesSourceFieldConfig.Builder().setScript(new Script(randomAlphaOfLength(10))).build();
}
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder(aggregationName)
.a(aConfig)
.b(bConfig);
if (randomBoolean()) {
aggregationBuilder.tails(randomIntBetween(1, 2));
}
if (randomBoolean()) {
aggregationBuilder.testType(randomFrom(TTestType.values()));
}
return aggregationBuilder;
}
@Override
protected Writeable.Reader<TTestAggregationBuilder> instanceReader() {
return TTestAggregationBuilder::new;
}
}

View File

@ -0,0 +1,550 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.ttest;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.script.MockScriptEngine;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptEngine;
import org.elasticsearch.script.ScriptModule;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.global.InternalGlobal;
import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.aggregations.support.MultiValuesSourceFieldConfig;
import org.elasticsearch.search.lookup.LeafDocLookup;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Consumer;
import java.util.function.Function;
import static java.util.Arrays.asList;
import static java.util.Collections.singleton;
public class TTestAggregatorTests extends AggregatorTestCase {
/**
* Script to return the {@code _value} provided by aggs framework.
*/
public static final String ADD_HALF_SCRIPT = "add_one";
@Override
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
return new TTestAggregationBuilder("foo")
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName(fieldName).build())
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName(fieldName).build());
}
@Override
protected ScriptService getMockScriptService() {
Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
scripts.put(ADD_HALF_SCRIPT, vars -> {
LeafDocLookup leafDocLookup = (LeafDocLookup) vars.get("doc");
String fieldname = (String) vars.get("fieldname");
ScriptDocValues<?> scriptDocValues = leafDocLookup.get(fieldname);
double val = ((Number) scriptDocValues.get(0)).doubleValue();
if (val == 1) {
val += 0.0000001;
}
return val + 0.5;
});
MockScriptEngine scriptEngine = new MockScriptEngine(MockScriptEngine.NAME,
scripts,
Collections.emptyMap());
Map<String, ScriptEngine> engines = Collections.singletonMap(scriptEngine.getType(), scriptEngine);
return new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS);
}
public void testNoMatchingField() throws IOException {
testCase(new MatchAllDocsQuery(), randomFrom(TTestType.values()), iw -> {
iw.addDocument(asList(new NumericDocValuesField("wrong_a", 102), new NumericDocValuesField("wrong_b", 89)));
iw.addDocument(asList(new NumericDocValuesField("wrong_a", 99), new NumericDocValuesField("wrong_b", 93)));
}, tTest -> assertEquals(Double.NaN, tTest.getValue(), 0));
}
public void testNotEnoughRecords() throws IOException {
testCase(new MatchAllDocsQuery(), randomFrom(TTestType.values()), iw -> {
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
}, tTest -> assertEquals(Double.NaN, tTest.getValue(), 0));
}
public void testSameValues() throws IOException {
TTestType tTestType = randomFrom(TTestType.values());
testCase(new MatchAllDocsQuery(), tTestType, iw -> {
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 102)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 99)));
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 111)));
iw.addDocument(asList(new NumericDocValuesField("a", 97), new NumericDocValuesField("b", 97)));
iw.addDocument(asList(new NumericDocValuesField("a", 101), new NumericDocValuesField("b", 101)));
}, tTest -> assertEquals(tTestType == TTestType.PAIRED ? Double.NaN : 1, tTest.getValue(), 0));
}
public void testMatchesSortedNumericDocValues() throws IOException {
testCase(new MatchAllDocsQuery(), TTestType.PAIRED, iw -> {
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("b", 89)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 111), new SortedNumericDocValuesField("b", 72)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 97), new SortedNumericDocValuesField("b", 98)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 101), new SortedNumericDocValuesField("b", 102)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 98)));
}, tTest -> assertEquals(0.09571844217 * 2, tTest.getValue(), 0.000001));
}
public void testMultiplePairedValues() {
AggregationExecutionException ex = expectThrows(AggregationExecutionException.class, () ->
testCase(new MatchAllDocsQuery(), TTestType.PAIRED, iw -> {
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("a", 103),
new SortedNumericDocValuesField("b", 89)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
}, tTest -> fail("Should have thrown exception"))
);
assertEquals(
"Encountered more than one value for a single document. Use a script to combine multiple values per doc into a single value.",
ex.getMessage());
}
public void testMultipleUnpairedValues() throws IOException {
TTestType tTestType = randomFrom(TTestType.HETEROSCEDASTIC, TTestType.HOMOSCEDASTIC);
testCase(new MatchAllDocsQuery(), tTestType, iw -> {
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("a", 103),
new SortedNumericDocValuesField("b", 89)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
}, tTest -> assertEquals(tTestType == TTestType.HETEROSCEDASTIC ? 0.0607303911 : 0.01718374671, tTest.getValue(), 0.000001));
}
public void testMissingValues() throws IOException {
TTestType tTestType = randomFrom(TTestType.values());
testCase(new MatchAllDocsQuery(), tTestType, iw -> {
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("b", 89)));
iw.addDocument(asList(new SortedNumericDocValuesField("a1", 99), new SortedNumericDocValuesField("b", 93)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 111), new SortedNumericDocValuesField("b1", 72)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 97), new SortedNumericDocValuesField("b", 98)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 101), new SortedNumericDocValuesField("b", 102)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 98)));
}, tTest -> {
switch (tTestType) {
case PAIRED:
assertEquals(0.4385093524, tTest.getValue(), 0.000001);
break;
case HOMOSCEDASTIC:
assertEquals(0.1066843841, tTest.getValue(), 0.000001);
break;
case HETEROSCEDASTIC:
assertEquals(0.1068382282, tTest.getValue(), 0.000001);
break;
default:
fail("unknown t-test type " + tTestType);
}
});
}
public void testUnmappedWithMissingField() throws IOException {
TTestType tTestType = randomFrom(TTestType.values());
boolean missA = randomBoolean();
boolean missB = missA == false || randomBoolean(); // at least one of the fields should be missing
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType1.setName(missA ? "not_a" : "a");
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType2.setName(missB ? "not_b" : "b");
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").setMissing(100).build())
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").setMissing(100).build())
.testType(tTestType);
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
}, (Consumer<InternalTTest>) tTest -> {
if (missA && missB) {
assertEquals(Double.NaN, tTest.getValue(), 0);
} else {
if (missA) {
switch (tTestType) {
case PAIRED:
assertEquals(0.1392089745, tTest.getValue(), 0.000001);
break;
case HOMOSCEDASTIC:
assertEquals(0.04600190799, tTest.getValue(), 0.000001);
break;
case HETEROSCEDASTIC:
assertEquals(0.1392089745, tTest.getValue(), 0.000001);
break;
default:
fail("unknown t-test type " + tTestType);
}
} else {
switch (tTestType) {
case PAIRED:
assertEquals(0.7951672353, tTest.getValue(), 0.000001);
break;
case HOMOSCEDASTIC:
assertEquals(0.7705842661, tTest.getValue(), 0.000001);
break;
case HETEROSCEDASTIC:
assertEquals(0.7951672353, tTest.getValue(), 0.000001);
break;
default:
fail("unknown t-test type " + tTestType);
}
}
}
}, fieldType1, fieldType2);
}
public void testUnsupportedType() {
TTestType tTestType = randomFrom(TTestType.values());
boolean wrongA = randomBoolean();
boolean wrongB = wrongA == false || randomBoolean(); // at least one of the fields should have unsupported type
MappedFieldType fieldType1;
if (wrongA) {
fieldType1 = new KeywordFieldMapper.KeywordFieldType();
fieldType1.setHasDocValues(true);
} else {
fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
}
fieldType1.setName("a");
MappedFieldType fieldType2;
if (wrongB) {
fieldType2 = new KeywordFieldMapper.KeywordFieldType();
fieldType2.setHasDocValues(true);
} else {
fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
}
fieldType2.setName("b");
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
.testType(tTestType);
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () ->
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("a", 103),
new SortedNumericDocValuesField("b", 89)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
}, tTest -> fail("Should have thrown exception"), fieldType1, fieldType2)
);
assertEquals(
"Expected numeric type on field [" + (wrongA ? "a" : "b") + "], but got [keyword]",
ex.getMessage());
}
public void testBadMissingField() {
TTestType tTestType = randomFrom(TTestType.values());
boolean missA = randomBoolean();
boolean missB = missA == false || randomBoolean(); // at least one of the fields should be have bad missing
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType1.setName("a");
MultiValuesSourceFieldConfig.Builder a = new MultiValuesSourceFieldConfig.Builder().setFieldName("a");
if (missA) {
a.setMissing("bad_number");
}
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType2.setName("b");
MultiValuesSourceFieldConfig.Builder b = new MultiValuesSourceFieldConfig.Builder().setFieldName("b");
if (missB) {
b.setMissing("bad_number");
}
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test").a(a.build()).b(b.build()).testType(tTestType);
NumberFormatException ex = expectThrows(NumberFormatException.class, () ->
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("b", 89)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
}, tTest -> fail("Should have thrown exception"), fieldType1, fieldType2)
);
assertEquals("For input string: \"bad_number\"", ex.getMessage());
}
public void testUnmappedWithBadMissingField() {
TTestType tTestType = randomFrom(TTestType.values());
boolean missA = randomBoolean();
boolean missB = missA == false || randomBoolean(); // at least one of the fields should be have bad missing
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType1.setName("a");
MultiValuesSourceFieldConfig.Builder a = new MultiValuesSourceFieldConfig.Builder();
if (missA) {
a.setFieldName("not_a").setMissing("bad_number");
} else {
a.setFieldName("a");
}
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
MultiValuesSourceFieldConfig.Builder b = new MultiValuesSourceFieldConfig.Builder();
if (missB) {
b.setFieldName("not_b").setMissing("bad_number");
} else {
b.setFieldName("b");
}
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test").a(a.build()).b(b.build()).testType(tTestType);
NumberFormatException ex = expectThrows(NumberFormatException.class, () ->
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("b", 89)));
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
}, tTest -> fail("Should have thrown exception"), fieldType1, fieldType2)
);
assertEquals("For input string: \"bad_number\"", ex.getMessage());
}
public void testEmptyBucket() throws IOException {
TTestType tTestType = randomFrom(TTestType.values());
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType1.setName("a");
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType2.setName("b");
MappedFieldType fieldTypePart = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldTypePart.setName("part");
HistogramAggregationBuilder histogram = new HistogramAggregationBuilder("histo").field("part").interval(10).minDocCount(0)
.subAggregation(new TTestAggregationBuilder("t_test")
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
.testType(tTestType));
testCase(histogram, new MatchAllDocsQuery(), iw -> {
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89),
new NumericDocValuesField("part", 1)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93),
new NumericDocValuesField("part", 1)));
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72),
new NumericDocValuesField("part", 1)));
iw.addDocument(asList(new NumericDocValuesField("a", 97), new NumericDocValuesField("b", 98),
new NumericDocValuesField("part", 21)));
iw.addDocument(asList(new NumericDocValuesField("a", 101), new NumericDocValuesField("b", 102),
new NumericDocValuesField("part", 21)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 98),
new NumericDocValuesField("part", 21)));
}, (Consumer<InternalHistogram>) histo -> {
assertEquals(3, histo.getBuckets().size());
assertNotNull(histo.getBuckets().get(0).getAggregations().asMap().get("t_test"));
InternalTTest tTest = (InternalTTest) histo.getBuckets().get(0).getAggregations().asMap().get("t_test");
assertEquals(tTestType == TTestType.PAIRED ? 0.1939778614 :
tTestType == TTestType.HOMOSCEDASTIC ? 0.05878871029 : 0.07529006595, tTest.getValue(), 0.000001);
assertNotNull(histo.getBuckets().get(1).getAggregations().asMap().get("t_test"));
tTest = (InternalTTest) histo.getBuckets().get(1).getAggregations().asMap().get("t_test");
assertEquals(Double.NaN, tTest.getValue(), 0.000001);
assertNotNull(histo.getBuckets().get(2).getAggregations().asMap().get("t_test"));
tTest = (InternalTTest) histo.getBuckets().get(2).getAggregations().asMap().get("t_test");
assertEquals(tTestType == TTestType.PAIRED ? 0.6666666667 :
tTestType == TTestType.HOMOSCEDASTIC ? 0.8593081179 : 0.8594865044, tTest.getValue(), 0.000001);
}, fieldType1, fieldType2, fieldTypePart);
}
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/54365")
public void testFormatter() throws IOException {
TTestType tTestType = randomFrom(TTestType.values());
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType1.setName("a");
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType2.setName("b");
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
.testType(tTestType).format("0.00%");
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72)));
}, (Consumer<InternalTTest>) tTest -> {
assertEquals(tTestType == TTestType.PAIRED ? 0.1939778614 :
tTestType == TTestType.HOMOSCEDASTIC ? 0.05878871029 : 0.07529006595, tTest.getValue(), 0.000001);
assertEquals(tTestType == TTestType.PAIRED ? "19.40%" :
tTestType == TTestType.HOMOSCEDASTIC ? "5.88%" : "7.53%", tTest.getValueAsString());
}, fieldType1, fieldType2);
}
public void testGetProperty() throws IOException {
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType1.setName("a");
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType2.setName("b");
GlobalAggregationBuilder globalBuilder = new GlobalAggregationBuilder("global")
.subAggregation(new TTestAggregationBuilder("t_test")
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
.testType(TTestType.PAIRED));
testCase(globalBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72)));
}, (Consumer<InternalGlobal>) global -> {
assertEquals(3, global.getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(global));
assertNotNull(global.getAggregations().asMap().get("t_test"));
InternalTTest tTest = (InternalTTest) global.getAggregations().asMap().get("t_test");
assertEquals(tTest, global.getProperty("t_test"));
assertEquals(0.1939778614, (Double) global.getProperty("t_test.value"), 0.000001);
}, fieldType1, fieldType2);
}
public void testScript() throws IOException {
boolean fieldInA = randomBoolean();
TTestType tTestType = randomFrom(TTestType.values());
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType.setName("field");
MultiValuesSourceFieldConfig a = new MultiValuesSourceFieldConfig.Builder().setFieldName("field").build();
MultiValuesSourceFieldConfig b = new MultiValuesSourceFieldConfig.Builder().setScript(
new Script(ScriptType.INLINE, MockScriptEngine.NAME, ADD_HALF_SCRIPT, Collections.singletonMap("fieldname", "field"))).build();
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test").
a(fieldInA ? a : b).b(fieldInA ? b : a).testType(tTestType);
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new NumericDocValuesField("field", 1)));
iw.addDocument(singleton(new NumericDocValuesField("field", 2)));
iw.addDocument(singleton(new NumericDocValuesField("field", 3)));
}, (Consumer<InternalTTest>) tTest -> {
assertEquals(tTestType == TTestType.PAIRED ? 0 : 0.5733922538, tTest.getValue(), 0.000001);
}, fieldType);
}
public void testPaired() throws IOException {
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType1.setName("a");
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType2.setName("b");
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
.testType(TTestType.PAIRED);
int tails = randomIntBetween(1, 2);
if (tails == 1 || randomBoolean()) {
aggregationBuilder.tails(tails);
}
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72)));
iw.addDocument(asList(new NumericDocValuesField("a", 97), new NumericDocValuesField("b", 98)));
iw.addDocument(asList(new NumericDocValuesField("a", 101), new NumericDocValuesField("b", 102)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 98)));
}, (Consumer<InternalTTest>) ttest -> {
assertEquals(0.09571844217 * tails, ttest.getValue(), 0.00001);
}, fieldType1, fieldType2);
}
public void testHomoscedastic() throws IOException {
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType1.setName("a");
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType2.setName("b");
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
.testType(TTestType.HOMOSCEDASTIC);
int tails = randomIntBetween(1, 2);
if (tails == 1 || randomBoolean()) {
aggregationBuilder.tails(tails);
}
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72)));
iw.addDocument(asList(new NumericDocValuesField("a", 97), new NumericDocValuesField("b", 98)));
iw.addDocument(asList(new NumericDocValuesField("a", 101), new NumericDocValuesField("b", 102)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 98)));
}, (Consumer<InternalTTest>) ttest -> {
assertEquals(0.03928288693 * tails, ttest.getValue(), 0.00001);
}, fieldType1, fieldType2);
}
public void testHeteroscedastic() throws IOException {
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType1.setName("a");
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType2.setName("b");
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build());
if (randomBoolean()) {
aggregationBuilder.testType(TTestType.HETEROSCEDASTIC);
}
int tails = randomIntBetween(1, 2);
if (tails == 1 || randomBoolean()) {
aggregationBuilder.tails(tails);
}
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72)));
iw.addDocument(asList(new NumericDocValuesField("a", 97), new NumericDocValuesField("b", 98)));
iw.addDocument(asList(new NumericDocValuesField("a", 101), new NumericDocValuesField("b", 102)));
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 98)));
}, (Consumer<InternalTTest>) ttest -> {
assertEquals(0.04538666214 * tails, ttest.getValue(), 0.00001);
}, fieldType1, fieldType2);
}
private void testCase(Query query, TTestType type,
CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Consumer<InternalTTest> verify) throws IOException {
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType1.setName("a");
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
fieldType2.setName("b");
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build());
if (type != TTestType.HETEROSCEDASTIC || randomBoolean()) {
aggregationBuilder.testType(type);
}
testCase(aggregationBuilder, query, buildIndex, verify, fieldType1, fieldType2);
}
private <T extends AggregationBuilder, V extends InternalAggregation> void testCase(
T aggregationBuilder, Query query,
CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Consumer<V> verify, MappedFieldType... fieldType) throws IOException {
try (Directory directory = newDirectory()) {
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
buildIndex.accept(indexWriter);
indexWriter.close();
try (IndexReader indexReader = DirectoryReader.open(directory)) {
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
V agg = searchAndReduce(indexSearcher, query, aggregationBuilder, fieldType);
verify.accept(agg);
}
}
}
}

View File

@ -114,19 +114,22 @@ public class AnalyticsStatsAction extends ActionType<AnalyticsStatsAction.Respon
static final ParseField CUMULATIVE_CARDINALITY_USAGE = new ParseField("cumulative_cardinality_usage");
static final ParseField STRING_STATS_USAGE = new ParseField("string_stats_usage");
static final ParseField TOP_METRICS_USAGE = new ParseField("top_metrics_usage");
static final ParseField T_TEST_USAGE = new ParseField("t_test_usage");
private final long boxplotUsage;
private final long cumulativeCardinalityUsage;
private final long stringStatsUsage;
private final long topMetricsUsage;
private final long ttestUsage;
public NodeResponse(DiscoveryNode node, long boxplotUsage, long cumulativeCardinalityUsage, long stringStatsUsage,
long topMetricsUsage) {
long topMetricsUsage, long ttestUsage) {
super(node);
this.boxplotUsage = boxplotUsage;
this.cumulativeCardinalityUsage = cumulativeCardinalityUsage;
this.stringStatsUsage = stringStatsUsage;
this.topMetricsUsage = topMetricsUsage;
this.ttestUsage = ttestUsage;
}
public NodeResponse(StreamInput in) throws IOException {
@ -144,6 +147,11 @@ public class AnalyticsStatsAction extends ActionType<AnalyticsStatsAction.Respon
topMetricsUsage = 0;
stringStatsUsage = 0;
}
if (in.getVersion().onOrAfter(Version.V_7_8_0)) {
ttestUsage = in.readVLong();
} else {
ttestUsage = 0;
}
}
@Override
@ -157,6 +165,9 @@ public class AnalyticsStatsAction extends ActionType<AnalyticsStatsAction.Respon
out.writeVLong(stringStatsUsage);
out.writeVLong(topMetricsUsage);
}
if (out.getVersion().onOrAfter(Version.V_7_8_0)) {
out.writeVLong(ttestUsage);
}
}
@Override
@ -166,6 +177,7 @@ public class AnalyticsStatsAction extends ActionType<AnalyticsStatsAction.Respon
builder.field(CUMULATIVE_CARDINALITY_USAGE.getPreferredName(), cumulativeCardinalityUsage);
builder.field(STRING_STATS_USAGE.getPreferredName(), stringStatsUsage);
builder.field(TOP_METRICS_USAGE.getPreferredName(), topMetricsUsage);
builder.field(T_TEST_USAGE.getPreferredName(), ttestUsage);
builder.endObject();
return builder;
}
@ -185,5 +197,9 @@ public class AnalyticsStatsAction extends ActionType<AnalyticsStatsAction.Respon
public long getTopMetricsUsage() {
return topMetricsUsage;
}
public long getTTestUsage() {
return topMetricsUsage;
}
}
}

View File

@ -78,7 +78,8 @@ public final class Aggregations {
"string_stats", // https://github.com/elastic/elasticsearch/issues/51925
"terms", // https://github.com/elastic/elasticsearch/issues/51073
"top_hits",
"top_metrics" // https://github.com/elastic/elasticsearch/issues/52236
"top_metrics", // https://github.com/elastic/elasticsearch/issues/52236
"t_test" // https://github.com/elastic/elasticsearch/issues/54503
);
private Aggregations() {}