mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-24 17:09:48 +00:00
Adds t_test metric aggregation that can perform paired and unpaired two-sample t-tests. In this PR support for filters in unpaired is still missing. It will be added in a follow-up PR. Relates to #53692
This commit is contained in:
parent
0049e9467b
commit
2794572a35
@ -535,6 +535,41 @@ for (int i = 0; i < 100; i++) {
|
||||
{"load_time": "$value"}"""
|
||||
}
|
||||
|
||||
// Used by t_test aggregations
|
||||
buildRestTests.setups['node_upgrade'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: node_upgrade
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 1
|
||||
mappings:
|
||||
properties:
|
||||
name:
|
||||
type: keyword
|
||||
startup_time_before:
|
||||
type: long
|
||||
startup_time_after:
|
||||
type: long
|
||||
- do:
|
||||
bulk:
|
||||
index: node_upgrade
|
||||
refresh: true
|
||||
body: |
|
||||
{"index":{}}
|
||||
{"name": "A", "startup_time_before": 102, "startup_time_after": 89}
|
||||
{"index":{}}
|
||||
{"name": "B", "startup_time_before": 99, "startup_time_after": 93}
|
||||
{"index":{}}
|
||||
{"name": "C", "startup_time_before": 111, "startup_time_after": 72}
|
||||
{"index":{}}
|
||||
{"name": "D", "startup_time_before": 97, "startup_time_after": 98}
|
||||
{"index":{}}
|
||||
{"name": "E", "startup_time_before": 101, "startup_time_after": 102}
|
||||
{"index":{}}
|
||||
{"name": "F", "startup_time_before": 99, "startup_time_after": 98}'''
|
||||
|
||||
// Used by iprange agg
|
||||
buildRestTests.setups['iprange'] = '''
|
||||
- do:
|
||||
|
@ -49,7 +49,7 @@ include::metrics/median-absolute-deviation-aggregation.asciidoc[]
|
||||
|
||||
include::metrics/boxplot-aggregation.asciidoc[]
|
||||
|
||||
|
||||
include::metrics/t-test-aggregation.asciidoc[]
|
||||
|
||||
|
||||
|
||||
|
114
docs/reference/aggregations/metrics/t-test-aggregation.asciidoc
Normal file
114
docs/reference/aggregations/metrics/t-test-aggregation.asciidoc
Normal file
@ -0,0 +1,114 @@
|
||||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[search-aggregations-metrics-ttest-aggregation]]
|
||||
=== TTest Aggregation
|
||||
|
||||
A `t_test` metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a Student's t-distribution
|
||||
under the null hypothesis on numeric values extracted from the aggregated documents or generated by provided scripts. In practice, this
|
||||
will tell you if the difference between two population means are statistically significant and did not occur by chance alone.
|
||||
|
||||
==== Syntax
|
||||
|
||||
A `t_test` aggregation looks like this in isolation:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"t_test": {
|
||||
"a": "value_before",
|
||||
"b": "value_after",
|
||||
"type": "paired"
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// NOTCONSOLE
|
||||
|
||||
Assuming that we have a record of node start up times before and after upgrade, let's look at a t-test to see if upgrade affected
|
||||
the node start up time in a meaningful way.
|
||||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
GET node_upgrade/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs" : {
|
||||
"startup_time_ttest" : {
|
||||
"t_test" : {
|
||||
"a" : {"field": "startup_time_before"}, <1>
|
||||
"b" : {"field": "startup_time_after"}, <2>
|
||||
"type": "paired" <3>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TEST[setup:node_upgrade]
|
||||
<1> The field `startup_time_before` must be a numeric field
|
||||
<2> The field `startup_time_after` must be a numeric field
|
||||
<3> Since we have data from the same nodes, we are using paired t-test.
|
||||
|
||||
The response will return the p-value or probability value for the test. It is the probability of obtaining results at least as extreme as
|
||||
the result processed by the aggregation, assuming that the null hypothesis is correct (which means there is no difference between
|
||||
population means). Smaller p-value means the null hypothesis is more likely to be incorrect and population means are indeed different.
|
||||
|
||||
[source,console-result]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...
|
||||
|
||||
"aggregations": {
|
||||
"startup_time_ttest": {
|
||||
"value": 0.1914368843365979 <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
|
||||
<1> The p-value.
|
||||
|
||||
==== T-Test Types
|
||||
|
||||
The `t_test` aggregation supports unpaired and paired two-sample t-tests. The type of the test can be specified using the `type` parameter:
|
||||
|
||||
`"type": "paired"`:: performs paired t-test
|
||||
`"type": "homoscedastic"`:: performs two-sample equal variance test
|
||||
`"type": "heteroscedastic"`:: performs two-sample unequal variance test (this is default)
|
||||
|
||||
==== Script
|
||||
|
||||
The `t_test` metric supports scripting. For example, if we need to adjust out load times for the before values, we could use
|
||||
a script to recalculate them on-the-fly:
|
||||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
GET node_upgrade/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs" : {
|
||||
"startup_time_ttest" : {
|
||||
"t_test" : {
|
||||
"a": {
|
||||
"script" : {
|
||||
"lang": "painless",
|
||||
"source": "doc['startup_time_before'].value - params.adjustment", <1>
|
||||
"params" : {
|
||||
"adjustment" : 10 <2>
|
||||
}
|
||||
}
|
||||
},
|
||||
"b": {
|
||||
"field": "startup_time_after" <3>
|
||||
},
|
||||
"type": "paired"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TEST[setup:node_upgrade]
|
||||
|
||||
<1> The `field` parameter is replaced with a `script` parameter, which uses the
|
||||
script to generate values which percentiles are calculated on
|
||||
<2> Scripting supports parameterized input just like any other script
|
||||
<3> We can mix scripts and fields
|
||||
|
@ -18,6 +18,8 @@ dependencies {
|
||||
|
||||
compileOnly project(path: xpackModule('core'), configuration: 'default')
|
||||
testCompile project(path: xpackModule('core'), configuration: 'testArtifacts')
|
||||
|
||||
compile 'org.apache.commons:commons-math3:3.2'
|
||||
}
|
||||
|
||||
integTest.enabled = false
|
||||
|
@ -0,0 +1 @@
|
||||
ec2544ab27e110d2d431bdad7d538ed509b21e62
|
475
x-pack/plugin/analytics/licenses/commons-math3-LICENSE.txt
Normal file
475
x-pack/plugin/analytics/licenses/commons-math3-LICENSE.txt
Normal file
@ -0,0 +1,475 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
|
||||
derived from unicode conversion examples available at
|
||||
http://www.unicode.org/Public/PROGRAMS/CVTUTF. Here is the copyright
|
||||
from those sources:
|
||||
|
||||
/*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* This source code is provided as is by Unicode, Inc. No claims are
|
||||
* made as to fitness for any particular purpose. No warranties of any
|
||||
* kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been
|
||||
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||
* sole remedy for any claim will be exchange of defective media
|
||||
* within 90 days of receipt.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Code
|
||||
*
|
||||
* Unicode, Inc. hereby grants the right to freely use the information
|
||||
* supplied in this file in the creation of products supporting the
|
||||
* Unicode Standard, and to make copies of this file in any form
|
||||
* for internal or external distribution as long as this notice
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/ArrayUtil.java was
|
||||
derived from Python 2.4.2 sources available at
|
||||
http://www.python.org. Full license is here:
|
||||
|
||||
http://www.python.org/download/releases/2.4.2/license/
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
|
||||
derived from Python 3.1.2 sources available at
|
||||
http://www.python.org. Full license is here:
|
||||
|
||||
http://www.python.org/download/releases/3.1.2/license/
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/automaton was
|
||||
derived from Brics automaton sources available at
|
||||
www.brics.dk/automaton/. Here is the copyright from those sources:
|
||||
|
||||
/*
|
||||
* Copyright (c) 2001-2009 Anders Moeller
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
The levenshtein automata tables in core/src/java/org/apache/lucene/util/automaton
|
||||
were automatically generated with the moman/finenight FSA package.
|
||||
Here is the copyright for those sources:
|
||||
|
||||
# Copyright (c) 2010, Jean-Philippe Barrette-LaPierre, <jpb@rrette.com>
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person
|
||||
# obtaining a copy of this software and associated documentation
|
||||
# files (the "Software"), to deal in the Software without
|
||||
# restriction, including without limitation the rights to use,
|
||||
# copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following
|
||||
# conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
|
||||
derived from ICU (http://www.icu-project.org)
|
||||
The full license is available here:
|
||||
http://source.icu-project.org/repos/icu/icu/trunk/license.html
|
||||
|
||||
/*
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, and/or sell copies of the
|
||||
* Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* provided that the above copyright notice(s) and this permission notice appear
|
||||
* in all copies of the Software and that both the above copyright notice(s) and
|
||||
* this permission notice appear in supporting documentation.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
|
||||
* LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR
|
||||
* ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall not
|
||||
* be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in this Software without prior written authorization of the
|
||||
* copyright holder.
|
||||
*/
|
||||
|
||||
The following license applies to the Snowball stemmers:
|
||||
|
||||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2002, Richard Boulton
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holders nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
The following license applies to the KStemmer:
|
||||
|
||||
Copyright © 2003,
|
||||
Center for Intelligent Information Retrieval,
|
||||
University of Massachusetts, Amherst.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. The names "Center for Intelligent Information Retrieval" and
|
||||
"University of Massachusetts" must not be used to endorse or promote products
|
||||
derived from this software without prior written permission. To obtain
|
||||
permission, contact info@ciir.cs.umass.edu.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
The following license applies to the Morfologik project:
|
||||
|
||||
Copyright (c) 2006 Dawid Weiss
|
||||
Copyright (c) 2007-2011 Dawid Weiss, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Morfologik nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
---
|
||||
|
||||
The dictionary comes from Morfologik project. Morfologik uses data from
|
||||
Polish ispell/myspell dictionary hosted at http://www.sjp.pl/slownik/en/ and
|
||||
is licenced on the terms of (inter alia) LGPL and Creative Commons
|
||||
ShareAlike. The part-of-speech tags were added in Morfologik project and
|
||||
are not found in the data from sjp.pl. The tagset is similar to IPI PAN
|
||||
tagset.
|
||||
|
||||
---
|
||||
|
||||
The following license applies to the Morfeusz project,
|
||||
used by org.apache.lucene.analysis.morfologik.
|
||||
|
||||
BSD-licensed dictionary of Polish (SGJP)
|
||||
http://sgjp.pl/morfeusz/
|
||||
|
||||
Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński,
|
||||
Marcin Woliński, Robert Wołosz
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,9 @@
|
||||
Apache Commons Math
|
||||
Copyright 2001-2020 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
||||
This product includes software developed for Orekit by
|
||||
CS Systèmes d'Information (http://www.c-s.fr/)
|
||||
Copyright 2010-2012 CS Systèmes d'Information
|
@ -39,6 +39,11 @@ import org.elasticsearch.xpack.analytics.stringstats.StringStatsAggregationBuild
|
||||
import org.elasticsearch.xpack.analytics.topmetrics.InternalTopMetrics;
|
||||
import org.elasticsearch.xpack.analytics.topmetrics.TopMetricsAggregationBuilder;
|
||||
import org.elasticsearch.xpack.analytics.topmetrics.TopMetricsAggregatorFactory;
|
||||
import org.elasticsearch.xpack.analytics.ttest.InternalTTest;
|
||||
import org.elasticsearch.xpack.analytics.ttest.PairedTTestState;
|
||||
import org.elasticsearch.xpack.analytics.ttest.TTestAggregationBuilder;
|
||||
import org.elasticsearch.xpack.analytics.ttest.TTestState;
|
||||
import org.elasticsearch.xpack.analytics.ttest.UnpairedTTestState;
|
||||
import org.elasticsearch.xpack.core.XPackField;
|
||||
import org.elasticsearch.xpack.core.XPackPlugin;
|
||||
import org.elasticsearch.xpack.core.analytics.action.AnalyticsStatsAction;
|
||||
@ -91,7 +96,12 @@ public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugi
|
||||
TopMetricsAggregationBuilder.NAME,
|
||||
TopMetricsAggregationBuilder::new,
|
||||
usage.track(AnalyticsUsage.Item.TOP_METRICS, checkLicense(TopMetricsAggregationBuilder.PARSER)))
|
||||
.addResultReader(InternalTopMetrics::new)
|
||||
.addResultReader(InternalTopMetrics::new),
|
||||
new AggregationSpec(
|
||||
TTestAggregationBuilder.NAME,
|
||||
TTestAggregationBuilder::new,
|
||||
usage.track(AnalyticsUsage.Item.T_TEST, checkLicense(TTestAggregationBuilder.PARSER)))
|
||||
.addResultReader(InternalTTest::new)
|
||||
);
|
||||
}
|
||||
|
||||
@ -131,6 +141,14 @@ public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugi
|
||||
return singletonList(new AnalyticsUsage());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<NamedWriteableRegistry.Entry> getNamedWriteables() {
|
||||
return Arrays.asList(
|
||||
new NamedWriteableRegistry.Entry(TTestState.class, PairedTTestState.NAME, PairedTTestState::new),
|
||||
new NamedWriteableRegistry.Entry(TTestState.class, UnpairedTTestState.NAME, UnpairedTTestState::new)
|
||||
);
|
||||
}
|
||||
|
||||
private static <T> ContextParser<String, T> checkLicense(ContextParser<String, T> realParser) {
|
||||
return (parser, name) -> {
|
||||
if (getLicenseState().isAnalyticsAllowed() == false) {
|
||||
|
@ -25,7 +25,8 @@ public class AnalyticsUsage {
|
||||
BOXPLOT,
|
||||
CUMULATIVE_CARDINALITY,
|
||||
STRING_STATS,
|
||||
TOP_METRICS;
|
||||
TOP_METRICS,
|
||||
T_TEST;
|
||||
}
|
||||
|
||||
private final Map<Item, AtomicLong> trackers = new EnumMap<>(Item.class);
|
||||
@ -54,6 +55,7 @@ public class AnalyticsUsage {
|
||||
trackers.get(Item.BOXPLOT).get(),
|
||||
trackers.get(Item.CUMULATIVE_CARDINALITY).get(),
|
||||
trackers.get(Item.STRING_STATS).get(),
|
||||
trackers.get(Item.TOP_METRICS).get());
|
||||
trackers.get(Item.TOP_METRICS).get(),
|
||||
trackers.get(Item.T_TEST).get());
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.metrics.InternalNumericMetricsAggregation;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
public class InternalTTest extends InternalNumericMetricsAggregation.SingleValue implements TTest {
|
||||
|
||||
protected final TTestState state;
|
||||
|
||||
InternalTTest(String name, TTestState state, DocValueFormat formatter, Map<String, Object> metadata) {
|
||||
super(name, metadata);
|
||||
this.state = state;
|
||||
this.format = formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read from a stream.
|
||||
*/
|
||||
public InternalTTest(StreamInput in) throws IOException {
|
||||
super(in);
|
||||
format = in.readNamedWriteable(DocValueFormat.class);
|
||||
state = in.readNamedWriteable(TTestState.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doWriteTo(StreamOutput out) throws IOException {
|
||||
out.writeNamedWriteable(format);
|
||||
out.writeNamedWriteable(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getWriteableName() {
|
||||
return TTestAggregationBuilder.NAME;
|
||||
}
|
||||
|
||||
// for testing only
|
||||
DocValueFormat format() {
|
||||
return format;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InternalTTest reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
|
||||
TTestState reduced = state.reduce(aggregations.stream().map(a -> ((InternalTTest) a).state));
|
||||
return new InternalTTest(name, reduced, format, getMetadata());
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
|
||||
double value = state.getValue();
|
||||
boolean hasValue = Double.isNaN(value) == false;
|
||||
builder.field(CommonFields.VALUE.getPreferredName(), hasValue ? value : null);
|
||||
if (hasValue && format != DocValueFormat.RAW) {
|
||||
builder.field(CommonFields.VALUE_AS_STRING.getPreferredName(), format.format(value).toString());
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(super.hashCode(), state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (obj == null || getClass() != obj.getClass()) return false;
|
||||
if (super.equals(obj) == false) return false;
|
||||
|
||||
InternalTTest that = (InternalTTest) obj;
|
||||
return Objects.equals(state, that.state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double value() {
|
||||
return state.getValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue() {
|
||||
return state.getValue();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollector;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||
import org.elasticsearch.search.aggregations.metrics.CompensatedSum;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSource;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.xpack.analytics.ttest.TTestAggregationBuilder.A_FIELD;
|
||||
import static org.elasticsearch.xpack.analytics.ttest.TTestAggregationBuilder.B_FIELD;
|
||||
|
||||
public class PairedTTestAggregator extends TTestAggregator<PairedTTestState> {
|
||||
private TTestStatsBuilder statsBuilder;
|
||||
|
||||
PairedTTestAggregator(String name, MultiValuesSource.NumericMultiValuesSource valuesSources, int tails, DocValueFormat format,
|
||||
SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metadata) throws IOException {
|
||||
super(name, valuesSources, tails, format, context, parent, pipelineAggregators, metadata);
|
||||
statsBuilder = new TTestStatsBuilder(context.bigArrays());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PairedTTestState getState(long bucket) {
|
||||
return new PairedTTestState(statsBuilder.get(bucket), tails);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PairedTTestState getEmptyState() {
|
||||
return new PairedTTestState(new TTestStats(0, 0, 0), tails);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected long size() {
|
||||
return statsBuilder.getSize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
||||
final LeafBucketCollector sub) throws IOException {
|
||||
if (valuesSources == null) {
|
||||
return LeafBucketCollector.NO_OP_COLLECTOR;
|
||||
}
|
||||
final BigArrays bigArrays = context.bigArrays();
|
||||
final SortedNumericDoubleValues docAValues = valuesSources.getField(A_FIELD.getPreferredName(), ctx);
|
||||
final SortedNumericDoubleValues docBValues = valuesSources.getField(B_FIELD.getPreferredName(), ctx);
|
||||
final CompensatedSum compDiffSum = new CompensatedSum(0, 0);
|
||||
final CompensatedSum compDiffSumOfSqr = new CompensatedSum(0, 0);
|
||||
|
||||
return new LeafBucketCollectorBase(sub, docAValues) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
if (docAValues.advanceExact(doc) && docBValues.advanceExact(doc)) {
|
||||
if (docAValues.docValueCount() > 1 || docBValues.docValueCount() > 1) {
|
||||
throw new AggregationExecutionException("Encountered more than one value for a " +
|
||||
"single document. Use a script to combine multiple values per doc into a single value.");
|
||||
}
|
||||
statsBuilder.grow(bigArrays, bucket + 1);
|
||||
// There should always be one value if advanceExact lands us here, either
|
||||
// a real value or a `missing` value
|
||||
assert docAValues.docValueCount() == 1;
|
||||
assert docBValues.docValueCount() == 1;
|
||||
double diff = docAValues.nextValue() - docBValues.nextValue();
|
||||
statsBuilder.addValue(compDiffSum, compDiffSumOfSqr, bucket, diff);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doClose() {
|
||||
Releasables.close(statsBuilder);
|
||||
}
|
||||
}
|
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.apache.commons.math3.distribution.TDistribution;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class PairedTTestState implements TTestState {
|
||||
|
||||
public static final String NAME = "P";
|
||||
|
||||
private final TTestStats stats;
|
||||
|
||||
private final int tails;
|
||||
|
||||
public PairedTTestState(TTestStats stats, int tails) {
|
||||
this.stats = stats;
|
||||
this.tails = tails;
|
||||
}
|
||||
|
||||
public PairedTTestState(StreamInput in) throws IOException {
|
||||
stats = new TTestStats(in);
|
||||
tails = in.readVInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
stats.writeTo(out);
|
||||
out.writeVInt(tails);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue() {
|
||||
if (stats.count < 2) {
|
||||
return Double.NaN;
|
||||
}
|
||||
long n = stats.count - 1;
|
||||
double meanDiff = stats.sum / stats.count;
|
||||
double variance = (stats.sumOfSqrs - ((stats.sum * stats.sum) / stats.count)) / stats.count;
|
||||
if (variance <= 0.0) {
|
||||
return meanDiff == 0.0 ? Double.NaN : 0.0;
|
||||
}
|
||||
double stdDiv = Math.sqrt(variance);
|
||||
double stdErr = stdDiv / Math.sqrt(n);
|
||||
double t = Math.abs(meanDiff / stdErr);
|
||||
TDistribution dist = new TDistribution(n);
|
||||
return dist.cumulativeProbability(-t) * tails;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TTestState reduce(Stream<TTestState> states) {
|
||||
TTestStats.Reducer reducer = new TTestStats.Reducer();
|
||||
states.forEach(tTestState -> {
|
||||
PairedTTestState state = (PairedTTestState) tTestState;
|
||||
reducer.accept(state.stats);
|
||||
if (state.tails != tails) {
|
||||
throw new IllegalStateException("Incompatible tails value in the reduce. Expected "
|
||||
+ state.tails + " reduced with " + tails);
|
||||
}
|
||||
});
|
||||
return new PairedTTestState(reducer.result(), tails);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getWriteableName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
PairedTTestState that = (PairedTTestState) o;
|
||||
return tails == that.tails &&
|
||||
stats.equals(that.stats);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(stats, tails);
|
||||
}
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
|
||||
|
||||
public interface TTest extends NumericMetricsAggregation.SingleValue {
|
||||
|
||||
double getValue();
|
||||
|
||||
}
|
@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactory;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSourceAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSourceAggregatorFactory;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSourceFieldConfig;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSourceParseHelper;
|
||||
import org.elasticsearch.search.aggregations.support.ValueType;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
public class TTestAggregationBuilder extends MultiValuesSourceAggregationBuilder.LeafOnly<ValuesSource.Numeric, TTestAggregationBuilder> {
|
||||
public static final String NAME = "t_test";
|
||||
public static final ParseField A_FIELD = new ParseField("a");
|
||||
public static final ParseField B_FIELD = new ParseField("b");
|
||||
public static final ParseField TYPE_FIELD = new ParseField("type");
|
||||
public static final ParseField TAILS_FIELD = new ParseField("tails");
|
||||
|
||||
public static final ObjectParser<TTestAggregationBuilder, String> PARSER =
|
||||
ObjectParser.fromBuilder(NAME, TTestAggregationBuilder::new);
|
||||
|
||||
static {
|
||||
MultiValuesSourceParseHelper.declareCommon(PARSER, true, ValueType.NUMERIC);
|
||||
MultiValuesSourceParseHelper.declareField(A_FIELD.getPreferredName(), PARSER, true, false);
|
||||
MultiValuesSourceParseHelper.declareField(B_FIELD.getPreferredName(), PARSER, true, false);
|
||||
PARSER.declareString(TTestAggregationBuilder::testType, TYPE_FIELD);
|
||||
PARSER.declareInt(TTestAggregationBuilder::tails, TAILS_FIELD);
|
||||
|
||||
}
|
||||
|
||||
private TTestType testType = TTestType.HETEROSCEDASTIC;
|
||||
|
||||
private int tails = 2;
|
||||
|
||||
public TTestAggregationBuilder(String name) {
|
||||
super(name, ValueType.NUMERIC);
|
||||
}
|
||||
|
||||
public TTestAggregationBuilder(TTestAggregationBuilder clone,
|
||||
AggregatorFactories.Builder factoriesBuilder,
|
||||
Map<String, Object> metadata) {
|
||||
super(clone, factoriesBuilder, metadata);
|
||||
}
|
||||
|
||||
public TTestAggregationBuilder a(MultiValuesSourceFieldConfig valueConfig) {
|
||||
field(A_FIELD.getPreferredName(), Objects.requireNonNull(valueConfig, "Configuration for field [" + A_FIELD + "] cannot be null"));
|
||||
return this;
|
||||
}
|
||||
|
||||
public TTestAggregationBuilder b(MultiValuesSourceFieldConfig weightConfig) {
|
||||
field(B_FIELD.getPreferredName(), Objects.requireNonNull(weightConfig, "Configuration for field [" + B_FIELD + "] cannot be null"));
|
||||
return this;
|
||||
}
|
||||
|
||||
public TTestAggregationBuilder testType(String testType) {
|
||||
return testType(TTestType.resolve(Objects.requireNonNull(testType, "Test type cannot be null")));
|
||||
}
|
||||
|
||||
public TTestAggregationBuilder testType(TTestType testType) {
|
||||
this.testType = Objects.requireNonNull(testType, "Test type cannot be null");
|
||||
return this;
|
||||
}
|
||||
|
||||
public TTestAggregationBuilder tails(int tails) {
|
||||
if (tails < 1 || tails > 2) {
|
||||
throw new IllegalArgumentException(
|
||||
"[tails] must be 1 or 2. Found [" + tails + "] in [" + name + "]");
|
||||
}
|
||||
this.tails = tails;
|
||||
return this;
|
||||
}
|
||||
|
||||
public TTestAggregationBuilder(StreamInput in) throws IOException {
|
||||
super(in, ValueType.NUMERIC);
|
||||
testType = in.readEnum(TTestType.class);
|
||||
tails = in.readVInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AggregationBuilder shallowCopy(AggregatorFactories.Builder factoriesBuilder, Map<String, Object> metadata) {
|
||||
return new TTestAggregationBuilder(this, factoriesBuilder, metadata);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BucketCardinality bucketCardinality() {
|
||||
return BucketCardinality.NONE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void innerWriteTo(StreamOutput out) throws IOException {
|
||||
out.writeEnum(testType);
|
||||
out.writeVInt(tails);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected MultiValuesSourceAggregatorFactory<ValuesSource.Numeric> innerBuild(
|
||||
QueryShardContext queryShardContext,
|
||||
Map<String, ValuesSourceConfig<ValuesSource.Numeric>> configs,
|
||||
DocValueFormat format,
|
||||
AggregatorFactory parent,
|
||||
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
|
||||
return new TTestAggregatorFactory(name, configs, testType, tails, format, queryShardContext, parent, subFactoriesBuilder, metadata);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder doXContentBody(XContentBuilder builder, ToXContent.Params params) throws IOException {
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getType() {
|
||||
return NAME;
|
||||
}
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSource;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public abstract class TTestAggregator<T extends TTestState> extends NumericMetricsAggregator.SingleValue {
|
||||
|
||||
protected final MultiValuesSource.NumericMultiValuesSource valuesSources;
|
||||
protected final int tails;
|
||||
|
||||
private DocValueFormat format;
|
||||
|
||||
TTestAggregator(String name, MultiValuesSource.NumericMultiValuesSource valuesSources, int tails, DocValueFormat format,
|
||||
SearchContext context, Aggregator parent,
|
||||
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metadata) throws IOException {
|
||||
super(name, context, parent, pipelineAggregators, metadata);
|
||||
this.valuesSources = valuesSources;
|
||||
this.tails = tails;
|
||||
this.format = format;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScoreMode scoreMode() {
|
||||
return valuesSources != null && valuesSources.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||
}
|
||||
|
||||
protected abstract T getState(long bucket);
|
||||
|
||||
protected abstract T getEmptyState();
|
||||
|
||||
protected abstract long size();
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildAggregation(long bucket) {
|
||||
if (valuesSources == null || bucket >= size()) {
|
||||
return buildEmptyAggregation();
|
||||
}
|
||||
return new InternalTTest(name, getState(bucket), format, metadata());
|
||||
}
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildEmptyAggregation() {
|
||||
return new InternalTTest(name, getEmptyState(), format, metadata());
|
||||
}
|
||||
|
||||
@Override
|
||||
public double metric(long owningBucketOrd) {
|
||||
return getState(owningBucketOrd).getValue();
|
||||
}
|
||||
}
|
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactory;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSource;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSourceAggregatorFactory;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
class TTestAggregatorFactory extends MultiValuesSourceAggregatorFactory<ValuesSource.Numeric>{
|
||||
|
||||
private final TTestType testType;
|
||||
private final int tails;
|
||||
|
||||
TTestAggregatorFactory(String name, Map<String, ValuesSourceConfig<ValuesSource.Numeric>> configs, TTestType testType, int tails,
|
||||
DocValueFormat format, QueryShardContext queryShardContext, AggregatorFactory parent,
|
||||
AggregatorFactories.Builder subFactoriesBuilder,
|
||||
Map<String, Object> metadata) throws IOException {
|
||||
super(name, configs, format, queryShardContext, parent, subFactoriesBuilder, metadata);
|
||||
this.testType = testType;
|
||||
this.tails = tails;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Aggregator createUnmapped(SearchContext searchContext,
|
||||
Aggregator parent,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metadata) throws IOException {
|
||||
switch (testType) {
|
||||
case PAIRED:
|
||||
return new PairedTTestAggregator(name, null, tails, format, searchContext, parent, pipelineAggregators, metadata);
|
||||
case HOMOSCEDASTIC:
|
||||
return new UnpairedTTestAggregator(name, null, tails, true, format, searchContext, parent, pipelineAggregators, metadata);
|
||||
case HETEROSCEDASTIC:
|
||||
return new UnpairedTTestAggregator(name, null, tails, false, format, searchContext, parent, pipelineAggregators, metadata);
|
||||
default:
|
||||
throw new IllegalArgumentException("Unsupported t-test type " + testType);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Aggregator doCreateInternal(SearchContext searchContext,
|
||||
Map<String, ValuesSourceConfig<ValuesSource.Numeric>> configs,
|
||||
DocValueFormat format,
|
||||
Aggregator parent,
|
||||
boolean collectsFromSingleBucket,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metadata) throws IOException {
|
||||
MultiValuesSource.NumericMultiValuesSource numericMultiVS
|
||||
= new MultiValuesSource.NumericMultiValuesSource(configs, queryShardContext);
|
||||
if (numericMultiVS.areValuesSourcesEmpty()) {
|
||||
return createUnmapped(searchContext, parent, pipelineAggregators, metadata);
|
||||
}
|
||||
switch (testType) {
|
||||
case PAIRED:
|
||||
return new PairedTTestAggregator(name, numericMultiVS, tails, format, searchContext, parent, pipelineAggregators, metadata);
|
||||
case HOMOSCEDASTIC:
|
||||
return new UnpairedTTestAggregator(name, numericMultiVS, tails, true, format, searchContext, parent, pipelineAggregators,
|
||||
metadata);
|
||||
case HETEROSCEDASTIC:
|
||||
return new UnpairedTTestAggregator(name, numericMultiVS, tails, false, format, searchContext, parent, pipelineAggregators,
|
||||
metadata);
|
||||
default:
|
||||
throw new IllegalArgumentException("Unsupported t-test type " + testType);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.elasticsearch.common.io.stream.NamedWriteable;
|
||||
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Base class for t-test aggregation state
|
||||
*/
|
||||
public interface TTestState extends NamedWriteable {
|
||||
double getValue();
|
||||
|
||||
TTestState reduce(Stream<TTestState> states);
|
||||
}
|
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.search.aggregations.metrics.CompensatedSum;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
/**
|
||||
* Collects basic stats that are needed to perform t-test
|
||||
*/
|
||||
public class TTestStats implements Writeable {
|
||||
public final long count;
|
||||
public final double sum;
|
||||
public final double sumOfSqrs;
|
||||
|
||||
public TTestStats(long count, double sum, double sumOfSqrs) {
|
||||
this.count = count;
|
||||
this.sum = sum;
|
||||
this.sumOfSqrs = sumOfSqrs;
|
||||
}
|
||||
|
||||
public TTestStats(StreamInput in) throws IOException {
|
||||
count = in.readVLong();
|
||||
sum = in.readDouble();
|
||||
sumOfSqrs = in.readDouble();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeVLong(count);
|
||||
out.writeDouble(sum);
|
||||
out.writeDouble(sumOfSqrs);
|
||||
}
|
||||
|
||||
public double variance() {
|
||||
double v = (sumOfSqrs - ((sum * sum) / count)) / (count - 1);
|
||||
return v < 0 ? 0 : v;
|
||||
}
|
||||
|
||||
public double average() {
|
||||
return sum / count;
|
||||
}
|
||||
|
||||
public static class Reducer implements Consumer<TTestStats> {
|
||||
private long count = 0;
|
||||
CompensatedSum compSum = new CompensatedSum(0, 0);
|
||||
CompensatedSum compSumOfSqrs = new CompensatedSum(0, 0);
|
||||
|
||||
@Override
|
||||
public void accept(TTestStats stat) {
|
||||
count += stat.count;
|
||||
compSum.add(stat.sum);
|
||||
compSumOfSqrs.add(stat.sumOfSqrs);
|
||||
}
|
||||
|
||||
public TTestStats result() {
|
||||
return new TTestStats(count, compSum.value(), compSumOfSqrs.value());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
TTestStats that = (TTestStats) o;
|
||||
return count == that.count &&
|
||||
Double.compare(that.sum, sum) == 0 &&
|
||||
Double.compare(that.sumOfSqrs, sumOfSqrs) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(count, sum, sumOfSqrs);
|
||||
}
|
||||
}
|
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.elasticsearch.common.lease.Releasable;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.common.util.DoubleArray;
|
||||
import org.elasticsearch.common.util.LongArray;
|
||||
import org.elasticsearch.search.aggregations.metrics.CompensatedSum;
|
||||
|
||||
public class TTestStatsBuilder implements Releasable {
|
||||
|
||||
private LongArray counts;
|
||||
private DoubleArray sums;
|
||||
private DoubleArray compensations;
|
||||
private DoubleArray sumOfSqrs;
|
||||
private DoubleArray sumOfSqrCompensations;
|
||||
|
||||
TTestStatsBuilder(BigArrays bigArrays) {
|
||||
counts = bigArrays.newLongArray(1, true);
|
||||
sums = bigArrays.newDoubleArray(1, true);
|
||||
compensations = bigArrays.newDoubleArray(1, true);
|
||||
sumOfSqrs = bigArrays.newDoubleArray(1, true);
|
||||
sumOfSqrCompensations = bigArrays.newDoubleArray(1, true);
|
||||
}
|
||||
|
||||
public TTestStats get(long bucket) {
|
||||
return new TTestStats(counts.get(bucket), sums.get(bucket), sumOfSqrs.get(bucket));
|
||||
}
|
||||
|
||||
public long build(long bucket) {
|
||||
return counts.get(bucket);
|
||||
}
|
||||
|
||||
public long getSize() {
|
||||
return counts.size();
|
||||
}
|
||||
|
||||
public void grow(BigArrays bigArrays, long buckets) {
|
||||
if (buckets >= counts.size()) {
|
||||
long overSize = BigArrays.overSize(buckets);
|
||||
counts = bigArrays.resize(counts, overSize);
|
||||
sums = bigArrays.resize(sums, overSize);
|
||||
compensations = bigArrays.resize(compensations, overSize);
|
||||
sumOfSqrs = bigArrays.resize(sumOfSqrs, overSize);
|
||||
sumOfSqrCompensations = bigArrays.resize(sumOfSqrCompensations, overSize);
|
||||
}
|
||||
}
|
||||
|
||||
public void addValue(CompensatedSum compSum, CompensatedSum compSumOfSqr, long bucket, double val) {
|
||||
counts.increment(bucket, 1);
|
||||
double sum = sums.get(bucket);
|
||||
double compensation = compensations.get(bucket);
|
||||
compSum.reset(sum, compensation);
|
||||
|
||||
double sumOfSqr = sumOfSqrs.get(bucket);
|
||||
double sumOfSqrCompensation = sumOfSqrCompensations.get(bucket);
|
||||
compSumOfSqr.reset(sumOfSqr, sumOfSqrCompensation);
|
||||
|
||||
compSum.add(val);
|
||||
compSumOfSqr.add(val * val);
|
||||
|
||||
sums.set(bucket, compSum.value());
|
||||
compensations.set(bucket, compSum.delta());
|
||||
sumOfSqrs.set(bucket, compSumOfSqr.value());
|
||||
sumOfSqrCompensations.set(bucket, compSumOfSqr.delta());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
Releasables.close(counts, sums, compensations, sumOfSqrs, sumOfSqrCompensations);
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* T-test type, paired, unpaired equal variance, unpaired unequal variance
|
||||
*/
|
||||
public enum TTestType {
|
||||
PAIRED, HOMOSCEDASTIC, HETEROSCEDASTIC;
|
||||
|
||||
public static TTestType resolve(String name) {
|
||||
return TTestType.valueOf(name.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
public String value() {
|
||||
return name().toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollector;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||
import org.elasticsearch.search.aggregations.metrics.CompensatedSum;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSource;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.xpack.analytics.ttest.TTestAggregationBuilder.A_FIELD;
|
||||
import static org.elasticsearch.xpack.analytics.ttest.TTestAggregationBuilder.B_FIELD;
|
||||
|
||||
public class UnpairedTTestAggregator extends TTestAggregator<UnpairedTTestState> {
|
||||
private final TTestStatsBuilder a;
|
||||
private final TTestStatsBuilder b;
|
||||
private final boolean homoscedastic;
|
||||
|
||||
UnpairedTTestAggregator(String name, MultiValuesSource.NumericMultiValuesSource valuesSources, int tails, boolean homoscedastic,
|
||||
DocValueFormat format, SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metadata) throws IOException {
|
||||
super(name, valuesSources, tails, format, context, parent, pipelineAggregators, metadata);
|
||||
BigArrays bigArrays = context.bigArrays();
|
||||
a = new TTestStatsBuilder(bigArrays);
|
||||
b = new TTestStatsBuilder(bigArrays);
|
||||
this.homoscedastic = homoscedastic;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UnpairedTTestState getState(long bucket) {
|
||||
return new UnpairedTTestState(a.get(bucket), b.get(bucket), homoscedastic, tails);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UnpairedTTestState getEmptyState() {
|
||||
return new UnpairedTTestState(new TTestStats(0, 0, 0), new TTestStats(0, 0, 0), homoscedastic, tails);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected long size() {
|
||||
return a.getSize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
||||
final LeafBucketCollector sub) throws IOException {
|
||||
if (valuesSources == null) {
|
||||
return LeafBucketCollector.NO_OP_COLLECTOR;
|
||||
}
|
||||
final BigArrays bigArrays = context.bigArrays();
|
||||
final SortedNumericDoubleValues docAValues = valuesSources.getField(A_FIELD.getPreferredName(), ctx);
|
||||
final SortedNumericDoubleValues docBValues = valuesSources.getField(B_FIELD.getPreferredName(), ctx);
|
||||
final CompensatedSum compSumA = new CompensatedSum(0, 0);
|
||||
final CompensatedSum compSumOfSqrA = new CompensatedSum(0, 0);
|
||||
final CompensatedSum compSumB = new CompensatedSum(0, 0);
|
||||
final CompensatedSum compSumOfSqrB = new CompensatedSum(0, 0);
|
||||
|
||||
return new LeafBucketCollectorBase(sub, docAValues) {
|
||||
|
||||
private void processValues(int doc, long bucket, SortedNumericDoubleValues docValues, CompensatedSum compSum,
|
||||
CompensatedSum compSumOfSqr, TTestStatsBuilder builder) throws IOException {
|
||||
if (docValues.advanceExact(doc)) {
|
||||
final int numValues = docValues.docValueCount();
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
builder.addValue(compSum, compSumOfSqr, bucket, docValues.nextValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
a.grow(bigArrays, bucket + 1);
|
||||
b.grow(bigArrays, bucket + 1);
|
||||
processValues(doc, bucket, docAValues, compSumA, compSumOfSqrA, a);
|
||||
processValues(doc, bucket, docBValues, compSumB, compSumOfSqrB, b);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doClose() {
|
||||
Releasables.close(a, b);
|
||||
}
|
||||
}
|
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.apache.commons.math3.distribution.TDistribution;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class UnpairedTTestState implements TTestState {
|
||||
|
||||
public static final String NAME = "U";
|
||||
|
||||
private final TTestStats a;
|
||||
private final TTestStats b;
|
||||
private boolean homoscedastic;
|
||||
private int tails;
|
||||
|
||||
public UnpairedTTestState(TTestStats a, TTestStats b, boolean homoscedastic, int tails) {
|
||||
this.a = a;
|
||||
this.b = b;
|
||||
this.homoscedastic = homoscedastic;
|
||||
this.tails = tails;
|
||||
}
|
||||
|
||||
public UnpairedTTestState(StreamInput in) throws IOException {
|
||||
a = new TTestStats(in);
|
||||
b = new TTestStats(in);
|
||||
homoscedastic = in.readBoolean();
|
||||
tails = in.readVInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue() {
|
||||
if (a.count < 2 || b.count < 2) {
|
||||
return Double.NaN;
|
||||
}
|
||||
|
||||
if (homoscedastic) {
|
||||
long n = a.count + b.count - 2;
|
||||
double variance = ((a.count - 1) * a.variance() + (b.count - 1) * b.variance()) / n;
|
||||
double nn = (1.0 / a.count + 1.0 / b.count);
|
||||
return p(variance * nn, n);
|
||||
} else {
|
||||
double s2an = a.variance() / a.count;
|
||||
double s2bn = b.variance() / b.count;
|
||||
double variance = s2an + s2bn;
|
||||
double degreeOfFreedom = variance * variance / (s2an * s2an / (a.count - 1) + s2bn * s2bn / (b.count - 1));
|
||||
return p(variance, degreeOfFreedom);
|
||||
}
|
||||
}
|
||||
|
||||
private double p(double sd2, double degreesOfFreedom) {
|
||||
if (degreesOfFreedom < 0) {
|
||||
return Double.NaN;
|
||||
}
|
||||
double sd = Math.sqrt(sd2);
|
||||
double meanDiff = a.average() - b.average();
|
||||
double t = Math.abs(meanDiff / sd);
|
||||
TDistribution dist = new TDistribution(degreesOfFreedom);
|
||||
return dist.cumulativeProbability(-t) * tails;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TTestState reduce(Stream<TTestState> states) {
|
||||
TTestStats.Reducer reducerA = new TTestStats.Reducer();
|
||||
TTestStats.Reducer reducerB = new TTestStats.Reducer();
|
||||
states.forEach(tTestState -> {
|
||||
UnpairedTTestState state = (UnpairedTTestState) tTestState;
|
||||
if (state.homoscedastic != homoscedastic) {
|
||||
throw new IllegalStateException("Incompatible homoscedastic mode in the reduce. Expected "
|
||||
+ state.homoscedastic + " reduced with " + homoscedastic);
|
||||
}
|
||||
if (state.tails != tails) {
|
||||
throw new IllegalStateException("Incompatible tails value in the reduce. Expected "
|
||||
+ state.tails + " reduced with " + tails);
|
||||
}
|
||||
reducerA.accept(state.a);
|
||||
reducerB.accept(state.b);
|
||||
});
|
||||
return new UnpairedTTestState(reducerA.result(), reducerB.result(), homoscedastic, tails);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
a.writeTo(out);
|
||||
b.writeTo(out);
|
||||
out.writeBoolean(homoscedastic);
|
||||
out.writeVInt(tails);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getWriteableName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
UnpairedTTestState that = (UnpairedTTestState) o;
|
||||
return homoscedastic == that.homoscedastic &&
|
||||
tails == that.tails &&
|
||||
a.equals(that.a) &&
|
||||
b.equals(that.b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(a, b, homoscedastic, tails);
|
||||
}
|
||||
}
|
@ -58,20 +58,21 @@ public class TransportAnalyticsStatsActionTests extends ESTestCase {
|
||||
}
|
||||
|
||||
public void test() throws IOException {
|
||||
AnalyticsUsage.Item item = randomFrom(AnalyticsUsage.Item.values());
|
||||
AnalyticsUsage realUsage = new AnalyticsUsage();
|
||||
AnalyticsUsage emptyUsage = new AnalyticsUsage();
|
||||
ContextParser<Void, Void> parser = realUsage.track(item, (p, c) -> c);
|
||||
ObjectPath unused = run(realUsage, emptyUsage);
|
||||
assertThat(unused.evaluate("stats.0." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
|
||||
assertThat(unused.evaluate("stats.1." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
|
||||
int count = between(1, 10000);
|
||||
for (int i = 0; i < count; i++) {
|
||||
assertNull(parser.parse(null, null));
|
||||
for (AnalyticsUsage.Item item : AnalyticsUsage.Item.values()) {
|
||||
AnalyticsUsage realUsage = new AnalyticsUsage();
|
||||
AnalyticsUsage emptyUsage = new AnalyticsUsage();
|
||||
ContextParser<Void, Void> parser = realUsage.track(item, (p, c) -> c);
|
||||
ObjectPath unused = run(realUsage, emptyUsage);
|
||||
assertThat(unused.evaluate("stats.0." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
|
||||
assertThat(unused.evaluate("stats.1." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
|
||||
int count = between(1, 10000);
|
||||
for (int i = 0; i < count; i++) {
|
||||
assertNull(parser.parse(null, null));
|
||||
}
|
||||
ObjectPath used = run(realUsage, emptyUsage);
|
||||
assertThat(item.name(), used.evaluate("stats.0." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(count));
|
||||
assertThat(item.name(), used.evaluate("stats.1." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
|
||||
}
|
||||
ObjectPath used = run(realUsage, emptyUsage);
|
||||
assertThat(used.evaluate("stats.0." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(count));
|
||||
assertThat(used.evaluate("stats.1." + item.name().toLowerCase(Locale.ROOT) + "_usage"), equalTo(0));
|
||||
}
|
||||
|
||||
private ObjectPath run(AnalyticsUsage... nodeUsages) throws IOException {
|
||||
|
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput;
|
||||
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.SearchModule;
|
||||
import org.elasticsearch.search.aggregations.Aggregation;
|
||||
import org.elasticsearch.search.aggregations.ParsedAggregation;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
import org.elasticsearch.test.InternalAggregationTestCase;
|
||||
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static java.util.Collections.emptyList;
|
||||
|
||||
public class InternalTTestTests extends InternalAggregationTestCase<InternalTTest> {
|
||||
|
||||
private TTestType type = randomFrom(TTestType.values());
|
||||
private int tails = randomIntBetween(1, 2);
|
||||
|
||||
@Override
|
||||
protected InternalTTest createTestInstance(String name, Map<String, Object> metadata) {
|
||||
TTestState state = randomState();
|
||||
DocValueFormat formatter = randomNumericDocValueFormat();
|
||||
return new InternalTTest(name, state, formatter, metadata);
|
||||
}
|
||||
|
||||
private TTestState randomState() {
|
||||
if (type == TTestType.PAIRED) {
|
||||
return new PairedTTestState(randomStats(), tails);
|
||||
} else {
|
||||
return new UnpairedTTestState(randomStats(), randomStats(), type == TTestType.HOMOSCEDASTIC, tails);
|
||||
}
|
||||
}
|
||||
|
||||
private TTestStats randomStats() {
|
||||
return new TTestStats(randomNonNegativeLong(), randomDouble(), randomDouble());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Writeable.Reader<InternalTTest> instanceReader() {
|
||||
return InternalTTest::new;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertReduced(InternalTTest reduced, List<InternalTTest> inputs) {
|
||||
TTestState expected = reduced.state.reduce(inputs.stream().map(a -> a.state));
|
||||
assertNotNull(expected);
|
||||
assertEquals(expected.getValue(), reduced.getValue(), 0.00001);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertFromXContent(InternalTTest min, ParsedAggregation parsedAggregation) {
|
||||
// There is no ParsedTTest yet so we cannot test it here
|
||||
}
|
||||
|
||||
@Override
|
||||
protected InternalTTest mutateInstance(InternalTTest instance) {
|
||||
String name = instance.getName();
|
||||
TTestState state;
|
||||
try (BytesStreamOutput output = new BytesStreamOutput()) {
|
||||
output.writeNamedWriteable(instance.state);
|
||||
try (StreamInput in = new NamedWriteableAwareStreamInput(output.bytes().streamInput(), getNamedWriteableRegistry())) {
|
||||
state = in.readNamedWriteable(TTestState.class);
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
throw new IllegalStateException(ex);
|
||||
}
|
||||
DocValueFormat formatter = instance.format();
|
||||
List<PipelineAggregator> pipelineAggregators = instance.pipelineAggregators();
|
||||
Map<String, Object> metadata = instance.getMetadata();
|
||||
switch (between(0, 2)) {
|
||||
case 0:
|
||||
name += randomAlphaOfLength(5);
|
||||
break;
|
||||
case 1:
|
||||
state = randomState();
|
||||
break;
|
||||
case 2:
|
||||
if (metadata == null) {
|
||||
metadata = new HashMap<>(1);
|
||||
} else {
|
||||
metadata = new HashMap<>(instance.getMetadata());
|
||||
}
|
||||
metadata.put(randomAlphaOfLength(15), randomInt());
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Illegal randomisation branch");
|
||||
}
|
||||
return new InternalTTest(name, state, formatter, metadata);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<NamedXContentRegistry.Entry> getNamedXContents() {
|
||||
List<NamedXContentRegistry.Entry> extendedNamedXContents = new ArrayList<>(super.getNamedXContents());
|
||||
extendedNamedXContents.add(new NamedXContentRegistry.Entry(Aggregation.class,
|
||||
new ParseField(TTestAggregationBuilder.NAME),
|
||||
(p, c) -> {
|
||||
assumeTrue("There is no ParsedTTest yet", false);
|
||||
return null;
|
||||
}
|
||||
));
|
||||
return extendedNamedXContents;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NamedWriteableRegistry getNamedWriteableRegistry() {
|
||||
List<NamedWriteableRegistry.Entry> entries = new ArrayList<>();
|
||||
entries.addAll(new SearchModule(Settings.EMPTY, false, emptyList()).getNamedWriteables());
|
||||
entries.addAll(new AnalyticsPlugin(Settings.EMPTY).getNamedWriteables());
|
||||
return new NamedWriteableRegistry(entries);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.script.Script;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.BaseAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSourceFieldConfig;
|
||||
import org.elasticsearch.test.AbstractSerializingTestCase;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static java.util.Collections.singletonList;
|
||||
import static org.hamcrest.Matchers.hasSize;
|
||||
|
||||
public class TTestAggregationBuilderTests extends AbstractSerializingTestCase<TTestAggregationBuilder> {
|
||||
String aggregationName;
|
||||
|
||||
@Before
|
||||
public void setupName() {
|
||||
aggregationName = randomAlphaOfLength(10);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NamedXContentRegistry xContentRegistry() {
|
||||
return new NamedXContentRegistry(singletonList(new NamedXContentRegistry.Entry(
|
||||
BaseAggregationBuilder.class,
|
||||
new ParseField(TTestAggregationBuilder.NAME),
|
||||
(p, n) -> TTestAggregationBuilder.PARSER.apply(p, (String) n))));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TTestAggregationBuilder doParseInstance(XContentParser parser) throws IOException {
|
||||
assertSame(XContentParser.Token.START_OBJECT, parser.nextToken());
|
||||
AggregatorFactories.Builder parsed = AggregatorFactories.parseAggregators(parser);
|
||||
assertThat(parsed.getAggregatorFactories(), hasSize(1));
|
||||
assertThat(parsed.getPipelineAggregatorFactories(), hasSize(0));
|
||||
TTestAggregationBuilder agg = (TTestAggregationBuilder) parsed.getAggregatorFactories().iterator().next();
|
||||
assertNull(parser.nextToken());
|
||||
assertNotNull(agg);
|
||||
return agg;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TTestAggregationBuilder createTestInstance() {
|
||||
MultiValuesSourceFieldConfig aConfig;
|
||||
if (randomBoolean()) {
|
||||
aConfig = new MultiValuesSourceFieldConfig.Builder().setFieldName("a_field").build();
|
||||
} else {
|
||||
aConfig = new MultiValuesSourceFieldConfig.Builder().setScript(new Script(randomAlphaOfLength(10))).build();
|
||||
}
|
||||
MultiValuesSourceFieldConfig bConfig;
|
||||
if (randomBoolean()) {
|
||||
bConfig = new MultiValuesSourceFieldConfig.Builder().setFieldName("b_field").build();
|
||||
} else {
|
||||
bConfig = new MultiValuesSourceFieldConfig.Builder().setScript(new Script(randomAlphaOfLength(10))).build();
|
||||
}
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder(aggregationName)
|
||||
.a(aConfig)
|
||||
.b(bConfig);
|
||||
if (randomBoolean()) {
|
||||
aggregationBuilder.tails(randomIntBetween(1, 2));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
aggregationBuilder.testType(randomFrom(TTestType.values()));
|
||||
}
|
||||
return aggregationBuilder;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Writeable.Reader<TTestAggregationBuilder> instanceReader() {
|
||||
return TTestAggregationBuilder::new;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,550 @@
|
||||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.analytics.ttest;
|
||||
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.elasticsearch.common.CheckedConsumer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.NumberFieldMapper;
|
||||
import org.elasticsearch.script.MockScriptEngine;
|
||||
import org.elasticsearch.script.Script;
|
||||
import org.elasticsearch.script.ScriptEngine;
|
||||
import org.elasticsearch.script.ScriptModule;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
import org.elasticsearch.script.ScriptType;
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
||||
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.bucket.global.InternalGlobal;
|
||||
import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
|
||||
import org.elasticsearch.search.aggregations.support.MultiValuesSourceFieldConfig;
|
||||
import org.elasticsearch.search.lookup.LeafDocLookup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
|
||||
import static java.util.Arrays.asList;
|
||||
import static java.util.Collections.singleton;
|
||||
|
||||
public class TTestAggregatorTests extends AggregatorTestCase {
|
||||
|
||||
/**
|
||||
* Script to return the {@code _value} provided by aggs framework.
|
||||
*/
|
||||
public static final String ADD_HALF_SCRIPT = "add_one";
|
||||
|
||||
@Override
|
||||
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
|
||||
return new TTestAggregationBuilder("foo")
|
||||
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName(fieldName).build())
|
||||
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName(fieldName).build());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ScriptService getMockScriptService() {
|
||||
Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
|
||||
|
||||
scripts.put(ADD_HALF_SCRIPT, vars -> {
|
||||
LeafDocLookup leafDocLookup = (LeafDocLookup) vars.get("doc");
|
||||
String fieldname = (String) vars.get("fieldname");
|
||||
ScriptDocValues<?> scriptDocValues = leafDocLookup.get(fieldname);
|
||||
double val = ((Number) scriptDocValues.get(0)).doubleValue();
|
||||
if (val == 1) {
|
||||
val += 0.0000001;
|
||||
}
|
||||
return val + 0.5;
|
||||
});
|
||||
|
||||
MockScriptEngine scriptEngine = new MockScriptEngine(MockScriptEngine.NAME,
|
||||
scripts,
|
||||
Collections.emptyMap());
|
||||
Map<String, ScriptEngine> engines = Collections.singletonMap(scriptEngine.getType(), scriptEngine);
|
||||
|
||||
return new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS);
|
||||
}
|
||||
|
||||
public void testNoMatchingField() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), randomFrom(TTestType.values()), iw -> {
|
||||
iw.addDocument(asList(new NumericDocValuesField("wrong_a", 102), new NumericDocValuesField("wrong_b", 89)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("wrong_a", 99), new NumericDocValuesField("wrong_b", 93)));
|
||||
}, tTest -> assertEquals(Double.NaN, tTest.getValue(), 0));
|
||||
}
|
||||
|
||||
public void testNotEnoughRecords() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), randomFrom(TTestType.values()), iw -> {
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
|
||||
}, tTest -> assertEquals(Double.NaN, tTest.getValue(), 0));
|
||||
}
|
||||
|
||||
public void testSameValues() throws IOException {
|
||||
TTestType tTestType = randomFrom(TTestType.values());
|
||||
testCase(new MatchAllDocsQuery(), tTestType, iw -> {
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 102)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 99)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 111)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 97), new NumericDocValuesField("b", 97)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 101), new NumericDocValuesField("b", 101)));
|
||||
}, tTest -> assertEquals(tTestType == TTestType.PAIRED ? Double.NaN : 1, tTest.getValue(), 0));
|
||||
}
|
||||
|
||||
public void testMatchesSortedNumericDocValues() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), TTestType.PAIRED, iw -> {
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 111), new SortedNumericDocValuesField("b", 72)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 97), new SortedNumericDocValuesField("b", 98)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 101), new SortedNumericDocValuesField("b", 102)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 98)));
|
||||
}, tTest -> assertEquals(0.09571844217 * 2, tTest.getValue(), 0.000001));
|
||||
}
|
||||
|
||||
public void testMultiplePairedValues() {
|
||||
AggregationExecutionException ex = expectThrows(AggregationExecutionException.class, () ->
|
||||
testCase(new MatchAllDocsQuery(), TTestType.PAIRED, iw -> {
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("a", 103),
|
||||
new SortedNumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
|
||||
}, tTest -> fail("Should have thrown exception"))
|
||||
);
|
||||
assertEquals(
|
||||
"Encountered more than one value for a single document. Use a script to combine multiple values per doc into a single value.",
|
||||
ex.getMessage());
|
||||
}
|
||||
|
||||
public void testMultipleUnpairedValues() throws IOException {
|
||||
TTestType tTestType = randomFrom(TTestType.HETEROSCEDASTIC, TTestType.HOMOSCEDASTIC);
|
||||
testCase(new MatchAllDocsQuery(), tTestType, iw -> {
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("a", 103),
|
||||
new SortedNumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
|
||||
}, tTest -> assertEquals(tTestType == TTestType.HETEROSCEDASTIC ? 0.0607303911 : 0.01718374671, tTest.getValue(), 0.000001));
|
||||
}
|
||||
|
||||
public void testMissingValues() throws IOException {
|
||||
TTestType tTestType = randomFrom(TTestType.values());
|
||||
testCase(new MatchAllDocsQuery(), tTestType, iw -> {
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a1", 99), new SortedNumericDocValuesField("b", 93)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 111), new SortedNumericDocValuesField("b1", 72)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 97), new SortedNumericDocValuesField("b", 98)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 101), new SortedNumericDocValuesField("b", 102)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 98)));
|
||||
}, tTest -> {
|
||||
switch (tTestType) {
|
||||
case PAIRED:
|
||||
assertEquals(0.4385093524, tTest.getValue(), 0.000001);
|
||||
break;
|
||||
case HOMOSCEDASTIC:
|
||||
assertEquals(0.1066843841, tTest.getValue(), 0.000001);
|
||||
break;
|
||||
case HETEROSCEDASTIC:
|
||||
assertEquals(0.1068382282, tTest.getValue(), 0.000001);
|
||||
break;
|
||||
default:
|
||||
fail("unknown t-test type " + tTestType);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void testUnmappedWithMissingField() throws IOException {
|
||||
TTestType tTestType = randomFrom(TTestType.values());
|
||||
boolean missA = randomBoolean();
|
||||
boolean missB = missA == false || randomBoolean(); // at least one of the fields should be missing
|
||||
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType1.setName(missA ? "not_a" : "a");
|
||||
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType2.setName(missB ? "not_b" : "b");
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
|
||||
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").setMissing(100).build())
|
||||
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").setMissing(100).build())
|
||||
.testType(tTestType);
|
||||
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
|
||||
}, (Consumer<InternalTTest>) tTest -> {
|
||||
if (missA && missB) {
|
||||
assertEquals(Double.NaN, tTest.getValue(), 0);
|
||||
} else {
|
||||
if (missA) {
|
||||
switch (tTestType) {
|
||||
case PAIRED:
|
||||
assertEquals(0.1392089745, tTest.getValue(), 0.000001);
|
||||
break;
|
||||
case HOMOSCEDASTIC:
|
||||
assertEquals(0.04600190799, tTest.getValue(), 0.000001);
|
||||
break;
|
||||
case HETEROSCEDASTIC:
|
||||
assertEquals(0.1392089745, tTest.getValue(), 0.000001);
|
||||
break;
|
||||
default:
|
||||
fail("unknown t-test type " + tTestType);
|
||||
}
|
||||
} else {
|
||||
switch (tTestType) {
|
||||
case PAIRED:
|
||||
assertEquals(0.7951672353, tTest.getValue(), 0.000001);
|
||||
break;
|
||||
case HOMOSCEDASTIC:
|
||||
assertEquals(0.7705842661, tTest.getValue(), 0.000001);
|
||||
break;
|
||||
case HETEROSCEDASTIC:
|
||||
assertEquals(0.7951672353, tTest.getValue(), 0.000001);
|
||||
break;
|
||||
default:
|
||||
fail("unknown t-test type " + tTestType);
|
||||
}
|
||||
}
|
||||
}
|
||||
}, fieldType1, fieldType2);
|
||||
}
|
||||
|
||||
public void testUnsupportedType() {
|
||||
TTestType tTestType = randomFrom(TTestType.values());
|
||||
boolean wrongA = randomBoolean();
|
||||
boolean wrongB = wrongA == false || randomBoolean(); // at least one of the fields should have unsupported type
|
||||
MappedFieldType fieldType1;
|
||||
if (wrongA) {
|
||||
fieldType1 = new KeywordFieldMapper.KeywordFieldType();
|
||||
fieldType1.setHasDocValues(true);
|
||||
} else {
|
||||
fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
}
|
||||
fieldType1.setName("a");
|
||||
MappedFieldType fieldType2;
|
||||
if (wrongB) {
|
||||
fieldType2 = new KeywordFieldMapper.KeywordFieldType();
|
||||
fieldType2.setHasDocValues(true);
|
||||
} else {
|
||||
fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
}
|
||||
fieldType2.setName("b");
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
|
||||
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
|
||||
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
|
||||
.testType(tTestType);
|
||||
|
||||
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () ->
|
||||
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("a", 103),
|
||||
new SortedNumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
|
||||
}, tTest -> fail("Should have thrown exception"), fieldType1, fieldType2)
|
||||
);
|
||||
assertEquals(
|
||||
"Expected numeric type on field [" + (wrongA ? "a" : "b") + "], but got [keyword]",
|
||||
ex.getMessage());
|
||||
}
|
||||
|
||||
public void testBadMissingField() {
|
||||
TTestType tTestType = randomFrom(TTestType.values());
|
||||
boolean missA = randomBoolean();
|
||||
boolean missB = missA == false || randomBoolean(); // at least one of the fields should be have bad missing
|
||||
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType1.setName("a");
|
||||
MultiValuesSourceFieldConfig.Builder a = new MultiValuesSourceFieldConfig.Builder().setFieldName("a");
|
||||
if (missA) {
|
||||
a.setMissing("bad_number");
|
||||
}
|
||||
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType2.setName("b");
|
||||
MultiValuesSourceFieldConfig.Builder b = new MultiValuesSourceFieldConfig.Builder().setFieldName("b");
|
||||
if (missB) {
|
||||
b.setMissing("bad_number");
|
||||
}
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test").a(a.build()).b(b.build()).testType(tTestType);
|
||||
|
||||
NumberFormatException ex = expectThrows(NumberFormatException.class, () ->
|
||||
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
|
||||
}, tTest -> fail("Should have thrown exception"), fieldType1, fieldType2)
|
||||
);
|
||||
assertEquals("For input string: \"bad_number\"", ex.getMessage());
|
||||
}
|
||||
|
||||
|
||||
public void testUnmappedWithBadMissingField() {
|
||||
TTestType tTestType = randomFrom(TTestType.values());
|
||||
boolean missA = randomBoolean();
|
||||
boolean missB = missA == false || randomBoolean(); // at least one of the fields should be have bad missing
|
||||
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType1.setName("a");
|
||||
MultiValuesSourceFieldConfig.Builder a = new MultiValuesSourceFieldConfig.Builder();
|
||||
if (missA) {
|
||||
a.setFieldName("not_a").setMissing("bad_number");
|
||||
} else {
|
||||
a.setFieldName("a");
|
||||
}
|
||||
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
|
||||
MultiValuesSourceFieldConfig.Builder b = new MultiValuesSourceFieldConfig.Builder();
|
||||
if (missB) {
|
||||
b.setFieldName("not_b").setMissing("bad_number");
|
||||
} else {
|
||||
b.setFieldName("b");
|
||||
}
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test").a(a.build()).b(b.build()).testType(tTestType);
|
||||
|
||||
NumberFormatException ex = expectThrows(NumberFormatException.class, () ->
|
||||
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
|
||||
}, tTest -> fail("Should have thrown exception"), fieldType1, fieldType2)
|
||||
);
|
||||
assertEquals("For input string: \"bad_number\"", ex.getMessage());
|
||||
}
|
||||
|
||||
public void testEmptyBucket() throws IOException {
|
||||
TTestType tTestType = randomFrom(TTestType.values());
|
||||
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType1.setName("a");
|
||||
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType2.setName("b");
|
||||
MappedFieldType fieldTypePart = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldTypePart.setName("part");
|
||||
HistogramAggregationBuilder histogram = new HistogramAggregationBuilder("histo").field("part").interval(10).minDocCount(0)
|
||||
.subAggregation(new TTestAggregationBuilder("t_test")
|
||||
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
|
||||
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
|
||||
.testType(tTestType));
|
||||
|
||||
testCase(histogram, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89),
|
||||
new NumericDocValuesField("part", 1)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93),
|
||||
new NumericDocValuesField("part", 1)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72),
|
||||
new NumericDocValuesField("part", 1)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 97), new NumericDocValuesField("b", 98),
|
||||
new NumericDocValuesField("part", 21)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 101), new NumericDocValuesField("b", 102),
|
||||
new NumericDocValuesField("part", 21)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 98),
|
||||
new NumericDocValuesField("part", 21)));
|
||||
}, (Consumer<InternalHistogram>) histo -> {
|
||||
assertEquals(3, histo.getBuckets().size());
|
||||
assertNotNull(histo.getBuckets().get(0).getAggregations().asMap().get("t_test"));
|
||||
InternalTTest tTest = (InternalTTest) histo.getBuckets().get(0).getAggregations().asMap().get("t_test");
|
||||
assertEquals(tTestType == TTestType.PAIRED ? 0.1939778614 :
|
||||
tTestType == TTestType.HOMOSCEDASTIC ? 0.05878871029 : 0.07529006595, tTest.getValue(), 0.000001);
|
||||
|
||||
assertNotNull(histo.getBuckets().get(1).getAggregations().asMap().get("t_test"));
|
||||
tTest = (InternalTTest) histo.getBuckets().get(1).getAggregations().asMap().get("t_test");
|
||||
assertEquals(Double.NaN, tTest.getValue(), 0.000001);
|
||||
|
||||
assertNotNull(histo.getBuckets().get(2).getAggregations().asMap().get("t_test"));
|
||||
tTest = (InternalTTest) histo.getBuckets().get(2).getAggregations().asMap().get("t_test");
|
||||
assertEquals(tTestType == TTestType.PAIRED ? 0.6666666667 :
|
||||
tTestType == TTestType.HOMOSCEDASTIC ? 0.8593081179 : 0.8594865044, tTest.getValue(), 0.000001);
|
||||
|
||||
}, fieldType1, fieldType2, fieldTypePart);
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/54365")
|
||||
public void testFormatter() throws IOException {
|
||||
TTestType tTestType = randomFrom(TTestType.values());
|
||||
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType1.setName("a");
|
||||
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType2.setName("b");
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
|
||||
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
|
||||
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
|
||||
.testType(tTestType).format("0.00%");
|
||||
|
||||
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72)));
|
||||
}, (Consumer<InternalTTest>) tTest -> {
|
||||
assertEquals(tTestType == TTestType.PAIRED ? 0.1939778614 :
|
||||
tTestType == TTestType.HOMOSCEDASTIC ? 0.05878871029 : 0.07529006595, tTest.getValue(), 0.000001);
|
||||
assertEquals(tTestType == TTestType.PAIRED ? "19.40%" :
|
||||
tTestType == TTestType.HOMOSCEDASTIC ? "5.88%" : "7.53%", tTest.getValueAsString());
|
||||
}, fieldType1, fieldType2);
|
||||
}
|
||||
|
||||
public void testGetProperty() throws IOException {
|
||||
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType1.setName("a");
|
||||
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType2.setName("b");
|
||||
GlobalAggregationBuilder globalBuilder = new GlobalAggregationBuilder("global")
|
||||
.subAggregation(new TTestAggregationBuilder("t_test")
|
||||
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
|
||||
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
|
||||
.testType(TTestType.PAIRED));
|
||||
|
||||
testCase(globalBuilder, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72)));
|
||||
}, (Consumer<InternalGlobal>) global -> {
|
||||
assertEquals(3, global.getDocCount());
|
||||
assertTrue(AggregationInspectionHelper.hasValue(global));
|
||||
assertNotNull(global.getAggregations().asMap().get("t_test"));
|
||||
InternalTTest tTest = (InternalTTest) global.getAggregations().asMap().get("t_test");
|
||||
assertEquals(tTest, global.getProperty("t_test"));
|
||||
assertEquals(0.1939778614, (Double) global.getProperty("t_test.value"), 0.000001);
|
||||
}, fieldType1, fieldType2);
|
||||
}
|
||||
|
||||
public void testScript() throws IOException {
|
||||
boolean fieldInA = randomBoolean();
|
||||
TTestType tTestType = randomFrom(TTestType.values());
|
||||
|
||||
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType.setName("field");
|
||||
|
||||
MultiValuesSourceFieldConfig a = new MultiValuesSourceFieldConfig.Builder().setFieldName("field").build();
|
||||
MultiValuesSourceFieldConfig b = new MultiValuesSourceFieldConfig.Builder().setScript(
|
||||
new Script(ScriptType.INLINE, MockScriptEngine.NAME, ADD_HALF_SCRIPT, Collections.singletonMap("fieldname", "field"))).build();
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test").
|
||||
a(fieldInA ? a : b).b(fieldInA ? b : a).testType(tTestType);
|
||||
|
||||
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(singleton(new NumericDocValuesField("field", 1)));
|
||||
iw.addDocument(singleton(new NumericDocValuesField("field", 2)));
|
||||
iw.addDocument(singleton(new NumericDocValuesField("field", 3)));
|
||||
}, (Consumer<InternalTTest>) tTest -> {
|
||||
assertEquals(tTestType == TTestType.PAIRED ? 0 : 0.5733922538, tTest.getValue(), 0.000001);
|
||||
}, fieldType);
|
||||
}
|
||||
|
||||
public void testPaired() throws IOException {
|
||||
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType1.setName("a");
|
||||
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType2.setName("b");
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
|
||||
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
|
||||
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
|
||||
.testType(TTestType.PAIRED);
|
||||
int tails = randomIntBetween(1, 2);
|
||||
if (tails == 1 || randomBoolean()) {
|
||||
aggregationBuilder.tails(tails);
|
||||
}
|
||||
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 97), new NumericDocValuesField("b", 98)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 101), new NumericDocValuesField("b", 102)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 98)));
|
||||
}, (Consumer<InternalTTest>) ttest -> {
|
||||
assertEquals(0.09571844217 * tails, ttest.getValue(), 0.00001);
|
||||
}, fieldType1, fieldType2);
|
||||
}
|
||||
|
||||
public void testHomoscedastic() throws IOException {
|
||||
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType1.setName("a");
|
||||
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType2.setName("b");
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
|
||||
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
|
||||
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build())
|
||||
.testType(TTestType.HOMOSCEDASTIC);
|
||||
int tails = randomIntBetween(1, 2);
|
||||
if (tails == 1 || randomBoolean()) {
|
||||
aggregationBuilder.tails(tails);
|
||||
}
|
||||
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 97), new NumericDocValuesField("b", 98)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 101), new NumericDocValuesField("b", 102)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 98)));
|
||||
}, (Consumer<InternalTTest>) ttest -> {
|
||||
assertEquals(0.03928288693 * tails, ttest.getValue(), 0.00001);
|
||||
}, fieldType1, fieldType2);
|
||||
}
|
||||
|
||||
|
||||
public void testHeteroscedastic() throws IOException {
|
||||
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType1.setName("a");
|
||||
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType2.setName("b");
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
|
||||
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
|
||||
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build());
|
||||
if (randomBoolean()) {
|
||||
aggregationBuilder.testType(TTestType.HETEROSCEDASTIC);
|
||||
}
|
||||
int tails = randomIntBetween(1, 2);
|
||||
if (tails == 1 || randomBoolean()) {
|
||||
aggregationBuilder.tails(tails);
|
||||
}
|
||||
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 102), new NumericDocValuesField("b", 89)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 93)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 111), new NumericDocValuesField("b", 72)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 97), new NumericDocValuesField("b", 98)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 101), new NumericDocValuesField("b", 102)));
|
||||
iw.addDocument(asList(new NumericDocValuesField("a", 99), new NumericDocValuesField("b", 98)));
|
||||
}, (Consumer<InternalTTest>) ttest -> {
|
||||
assertEquals(0.04538666214 * tails, ttest.getValue(), 0.00001);
|
||||
}, fieldType1, fieldType2);
|
||||
}
|
||||
|
||||
private void testCase(Query query, TTestType type,
|
||||
CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
|
||||
Consumer<InternalTTest> verify) throws IOException {
|
||||
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType1.setName("a");
|
||||
MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
||||
fieldType2.setName("b");
|
||||
|
||||
TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
|
||||
.a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").build())
|
||||
.b(new MultiValuesSourceFieldConfig.Builder().setFieldName("b").build());
|
||||
if (type != TTestType.HETEROSCEDASTIC || randomBoolean()) {
|
||||
aggregationBuilder.testType(type);
|
||||
}
|
||||
testCase(aggregationBuilder, query, buildIndex, verify, fieldType1, fieldType2);
|
||||
}
|
||||
|
||||
private <T extends AggregationBuilder, V extends InternalAggregation> void testCase(
|
||||
T aggregationBuilder, Query query,
|
||||
CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
|
||||
Consumer<V> verify, MappedFieldType... fieldType) throws IOException {
|
||||
try (Directory directory = newDirectory()) {
|
||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
||||
buildIndex.accept(indexWriter);
|
||||
indexWriter.close();
|
||||
|
||||
try (IndexReader indexReader = DirectoryReader.open(directory)) {
|
||||
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
|
||||
|
||||
V agg = searchAndReduce(indexSearcher, query, aggregationBuilder, fieldType);
|
||||
verify.accept(agg);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -114,19 +114,22 @@ public class AnalyticsStatsAction extends ActionType<AnalyticsStatsAction.Respon
|
||||
static final ParseField CUMULATIVE_CARDINALITY_USAGE = new ParseField("cumulative_cardinality_usage");
|
||||
static final ParseField STRING_STATS_USAGE = new ParseField("string_stats_usage");
|
||||
static final ParseField TOP_METRICS_USAGE = new ParseField("top_metrics_usage");
|
||||
static final ParseField T_TEST_USAGE = new ParseField("t_test_usage");
|
||||
|
||||
private final long boxplotUsage;
|
||||
private final long cumulativeCardinalityUsage;
|
||||
private final long stringStatsUsage;
|
||||
private final long topMetricsUsage;
|
||||
private final long ttestUsage;
|
||||
|
||||
public NodeResponse(DiscoveryNode node, long boxplotUsage, long cumulativeCardinalityUsage, long stringStatsUsage,
|
||||
long topMetricsUsage) {
|
||||
long topMetricsUsage, long ttestUsage) {
|
||||
super(node);
|
||||
this.boxplotUsage = boxplotUsage;
|
||||
this.cumulativeCardinalityUsage = cumulativeCardinalityUsage;
|
||||
this.stringStatsUsage = stringStatsUsage;
|
||||
this.topMetricsUsage = topMetricsUsage;
|
||||
this.ttestUsage = ttestUsage;
|
||||
}
|
||||
|
||||
public NodeResponse(StreamInput in) throws IOException {
|
||||
@ -144,6 +147,11 @@ public class AnalyticsStatsAction extends ActionType<AnalyticsStatsAction.Respon
|
||||
topMetricsUsage = 0;
|
||||
stringStatsUsage = 0;
|
||||
}
|
||||
if (in.getVersion().onOrAfter(Version.V_7_8_0)) {
|
||||
ttestUsage = in.readVLong();
|
||||
} else {
|
||||
ttestUsage = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -157,6 +165,9 @@ public class AnalyticsStatsAction extends ActionType<AnalyticsStatsAction.Respon
|
||||
out.writeVLong(stringStatsUsage);
|
||||
out.writeVLong(topMetricsUsage);
|
||||
}
|
||||
if (out.getVersion().onOrAfter(Version.V_7_8_0)) {
|
||||
out.writeVLong(ttestUsage);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -166,6 +177,7 @@ public class AnalyticsStatsAction extends ActionType<AnalyticsStatsAction.Respon
|
||||
builder.field(CUMULATIVE_CARDINALITY_USAGE.getPreferredName(), cumulativeCardinalityUsage);
|
||||
builder.field(STRING_STATS_USAGE.getPreferredName(), stringStatsUsage);
|
||||
builder.field(TOP_METRICS_USAGE.getPreferredName(), topMetricsUsage);
|
||||
builder.field(T_TEST_USAGE.getPreferredName(), ttestUsage);
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
@ -185,5 +197,9 @@ public class AnalyticsStatsAction extends ActionType<AnalyticsStatsAction.Respon
|
||||
public long getTopMetricsUsage() {
|
||||
return topMetricsUsage;
|
||||
}
|
||||
|
||||
public long getTTestUsage() {
|
||||
return topMetricsUsage;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -78,7 +78,8 @@ public final class Aggregations {
|
||||
"string_stats", // https://github.com/elastic/elasticsearch/issues/51925
|
||||
"terms", // https://github.com/elastic/elasticsearch/issues/51073
|
||||
"top_hits",
|
||||
"top_metrics" // https://github.com/elastic/elasticsearch/issues/52236
|
||||
"top_metrics", // https://github.com/elastic/elasticsearch/issues/52236
|
||||
"t_test" // https://github.com/elastic/elasticsearch/issues/54503
|
||||
);
|
||||
|
||||
private Aggregations() {}
|
||||
|
Loading…
x
Reference in New Issue
Block a user