migrate master branch for xpack
|
@ -0,0 +1,8 @@
|
|||
# This file is used with all of the non-matrix tests in Jenkins.
|
||||
|
||||
# This .properties file defines the versions of Java with which to
|
||||
# build and test Elasticsearch for this branch. Valid Java versions
|
||||
# are 'java' or 'openjdk' followed by the major release number.
|
||||
|
||||
ES_BUILD_JAVA=java10
|
||||
ES_RUNTIME_JAVA=java8
|
|
@ -0,0 +1,2 @@
|
|||
ES_BUILD_JAVA:
|
||||
- java10
|
|
@ -0,0 +1 @@
|
|||
exclude:
|
|
@ -0,0 +1,3 @@
|
|||
ES_RUNTIME_JAVA:
|
||||
- java8
|
||||
- java10
|
|
@ -0,0 +1,88 @@
|
|||
((java-mode
|
||||
.
|
||||
((eval
|
||||
.
|
||||
(progn
|
||||
(defun my/point-in-defun-declaration-p ()
|
||||
(let ((bod (save-excursion (c-beginning-of-defun)
|
||||
(point))))
|
||||
(<= bod
|
||||
(point)
|
||||
(save-excursion (goto-char bod)
|
||||
(re-search-forward "{")
|
||||
(point)))))
|
||||
|
||||
(defun my/is-string-concatenation-p ()
|
||||
"Returns true if the previous line is a string concatenation"
|
||||
(save-excursion
|
||||
(let ((start (point)))
|
||||
(forward-line -1)
|
||||
(if (re-search-forward " \\\+$" start t) t nil))))
|
||||
|
||||
(defun my/inside-java-lambda-p ()
|
||||
"Returns true if point is the first statement inside of a lambda"
|
||||
(save-excursion
|
||||
(c-beginning-of-statement-1)
|
||||
(let ((start (point)))
|
||||
(forward-line -1)
|
||||
(if (search-forward " -> {" start t) t nil))))
|
||||
|
||||
(defun my/trailing-paren-p ()
|
||||
"Returns true if point is a training paren and semicolon"
|
||||
(save-excursion
|
||||
(end-of-line)
|
||||
(let ((endpoint (point)))
|
||||
(beginning-of-line)
|
||||
(if (re-search-forward "[ ]*);$" endpoint t) t nil))))
|
||||
|
||||
(defun my/prev-line-call-with-no-args-p ()
|
||||
"Return true if the previous line is a function call with no arguments"
|
||||
(save-excursion
|
||||
(let ((start (point)))
|
||||
(forward-line -1)
|
||||
(if (re-search-forward ".($" start t) t nil))))
|
||||
|
||||
(defun my/arglist-cont-nonempty-indentation (arg)
|
||||
(if (my/inside-java-lambda-p)
|
||||
'+
|
||||
(if (my/is-string-concatenation-p)
|
||||
16
|
||||
(unless (my/point-in-defun-declaration-p) '++))))
|
||||
|
||||
(defun my/statement-block-intro (arg)
|
||||
(if (and (c-at-statement-start-p) (my/inside-java-lambda-p)) 0 '+))
|
||||
|
||||
(defun my/block-close (arg)
|
||||
(if (my/inside-java-lambda-p) '- 0))
|
||||
|
||||
(defun my/arglist-close (arg) (if (my/trailing-paren-p) 0 '--))
|
||||
|
||||
(defun my/arglist-intro (arg)
|
||||
(if (my/prev-line-call-with-no-args-p) '++ 0))
|
||||
|
||||
(c-set-offset 'inline-open 0)
|
||||
(c-set-offset 'topmost-intro-cont '+)
|
||||
(c-set-offset 'statement-block-intro 'my/statement-block-intro)
|
||||
(c-set-offset 'block-close 'my/block-close)
|
||||
(c-set-offset 'knr-argdecl-intro '+)
|
||||
(c-set-offset 'substatement-open '+)
|
||||
(c-set-offset 'substatement-label '+)
|
||||
(c-set-offset 'case-label '+)
|
||||
(c-set-offset 'label '+)
|
||||
(c-set-offset 'statement-case-open '+)
|
||||
(c-set-offset 'statement-cont '++)
|
||||
(c-set-offset 'arglist-intro 'my/arglist-intro)
|
||||
(c-set-offset 'arglist-cont-nonempty '(my/arglist-cont-nonempty-indentation c-lineup-arglist))
|
||||
(c-set-offset 'arglist-close 'my/arglist-close)
|
||||
(c-set-offset 'inexpr-class 0)
|
||||
(c-set-offset 'access-label 0)
|
||||
(c-set-offset 'inher-intro '++)
|
||||
(c-set-offset 'inher-cont '++)
|
||||
(c-set-offset 'brace-list-intro '+)
|
||||
(c-set-offset 'func-decl-cont '++)
|
||||
))
|
||||
(c-basic-offset . 4)
|
||||
(c-comment-only-line-offset . (0 . 0))
|
||||
(fill-column . 140)
|
||||
(fci-rule-column . 140)
|
||||
(compile-command . "gradle compileTestJava"))))
|
|
@ -0,0 +1,6 @@
|
|||
<!--
|
||||
Please do not submit any issues related to security vulnerabilities that
|
||||
could be exploited by an attacker. Instead, send an email to
|
||||
security@elastic.co. If you have any doubts, send an email to
|
||||
security@elastic.co.
|
||||
-->
|
|
@ -0,0 +1,54 @@
|
|||
.idea/
|
||||
.gradle/
|
||||
*.iml
|
||||
*.ipr
|
||||
*.iws
|
||||
work/
|
||||
/data/
|
||||
logs/
|
||||
.DS_Store
|
||||
build/
|
||||
build-idea/
|
||||
build-eclipse/
|
||||
generated-resources/
|
||||
target/
|
||||
*-execution-hints.log
|
||||
docs/html/
|
||||
docs/build.log
|
||||
npm-debug.log
|
||||
/tmp/
|
||||
backwards/
|
||||
html_docs
|
||||
.vagrant/
|
||||
vendor/
|
||||
.bundle
|
||||
Gemfile.lock
|
||||
|
||||
## eclipse ignores (use 'mvn eclipse:eclipse' to build eclipse projects)
|
||||
## All files (.project, .classpath, .settings/*) should be generated through Maven which
|
||||
## will correctly set the classpath based on the declared dependencies and write settings
|
||||
## files to ensure common coding style across Eclipse and IDEA.
|
||||
.project
|
||||
.classpath
|
||||
eclipse-build
|
||||
*/.project
|
||||
*/.classpath
|
||||
*/eclipse-build
|
||||
.settings
|
||||
!/.settings/org.eclipse.core.resources.prefs
|
||||
!/.settings/org.eclipse.jdt.core.prefs
|
||||
!/.settings/org.eclipse.jdt.ui.prefs
|
||||
|
||||
## netbeans ignores
|
||||
nb-configuration.xml
|
||||
nbactions.xml
|
||||
|
||||
dependency-reduced-pom.xml
|
||||
github.token
|
||||
|
||||
## ignore attachment files
|
||||
.local-*
|
||||
*/.local-*
|
||||
|
||||
## ignore antlr temporary files used by vscode-antlr4
|
||||
.antlr
|
|
@ -0,0 +1,8 @@
|
|||
-/target
|
||||
-/license/target
|
||||
-/marvel/target
|
||||
-/qa/target
|
||||
-/shield/target
|
||||
-/watcher/target
|
||||
-/x-dev-tools/target
|
||||
-*.class
|
|
@ -0,0 +1,223 @@
|
|||
ELASTIC LICENSE AGREEMENT
|
||||
|
||||
PLEASE READ CAREFULLY THIS ELASTIC LICENSE AGREEMENT (THIS "AGREEMENT"), WHICH
|
||||
CONSTITUTES A LEGALLY BINDING AGREEMENT AND GOVERNS ALL OF YOUR USE OF ALL OF
|
||||
THE ELASTIC SOFTWARE WITH WHICH THIS AGREEMENT IS INCLUDED ("ELASTIC SOFTWARE")
|
||||
THAT IS PROVIDED IN OBJECT CODE FORMAT, AND, IN ACCORDANCE WITH SECTION 2 BELOW,
|
||||
CERTAIN OF THE ELASTIC SOFTWARE THAT IS PROVIDED IN SOURCE CODE FORMAT. BY
|
||||
INSTALLING OR USING ANY OF THE ELASTIC SOFTWARE GOVERNED BY THIS AGREEMENT, YOU
|
||||
ARE ASSENTING TO THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE
|
||||
WITH SUCH TERMS AND CONDITIONS, YOU MAY NOT INSTALL OR USE THE ELASTIC SOFTWARE
|
||||
GOVERNED BY THIS AGREEMENT. IF YOU ARE INSTALLING OR USING THE SOFTWARE ON
|
||||
BEHALF OF A LEGAL ENTITY, YOU REPRESENT AND WARRANT THAT YOU HAVE THE ACTUAL
|
||||
AUTHORITY TO AGREE TO THE TERMS AND CONDITIONS OF THIS AGREEMENT ON BEHALF OF
|
||||
SUCH ENTITY.
|
||||
|
||||
Posted Date: April 20, 2018
|
||||
|
||||
This Agreement is entered into by and between Elasticsearch BV ("Elastic") and
|
||||
You, or the legal entity on behalf of whom You are acting (as applicable,
|
||||
"You").
|
||||
|
||||
1. OBJECT CODE END USER LICENSES, RESTRICTIONS AND THIRD PARTY OPEN SOURCE
|
||||
SOFTWARE
|
||||
|
||||
1.1 Object Code End User License. Subject to the terms and conditions of
|
||||
Section 1.2 of this Agreement, Elastic hereby grants to You, AT NO CHARGE and
|
||||
for so long as you are not in breach of any provision of this Agreement, a
|
||||
License to the Basic Features and Functions of the Elastic Software.
|
||||
|
||||
1.2 Reservation of Rights; Restrictions. As between Elastic and You, Elastic
|
||||
and its licensors own all right, title and interest in and to the Elastic
|
||||
Software, and except as expressly set forth in Sections 1.1, and 2.1 of this
|
||||
Agreement, no other license to the Elastic Software is granted to You under
|
||||
this Agreement, by implication, estoppel or otherwise. You agree not to: (i)
|
||||
reverse engineer or decompile, decrypt, disassemble or otherwise reduce any
|
||||
Elastic Software provided to You in Object Code, or any portion thereof, to
|
||||
Source Code, except and only to the extent any such restriction is prohibited
|
||||
by applicable law, (ii) except as expressly permitted in this Agreement,
|
||||
prepare derivative works from, modify, copy or use the Elastic Software Object
|
||||
Code or the Commercial Software Source Code in any manner; (iii) except as
|
||||
expressly permitted in Section 1.1 above, transfer, sell, rent, lease,
|
||||
distribute, sublicense, loan or otherwise transfer, Elastic Software Object
|
||||
Code, in whole or in part, to any third party; (iv) use Elastic Software
|
||||
Object Code for providing time-sharing services, any software-as-a-service,
|
||||
service bureau services or as part of an application services provider or
|
||||
other service offering (collectively, "SaaS Offering") where obtaining access
|
||||
to the Elastic Software or the features and functions of the Elastic Software
|
||||
is a primary reason or substantial motivation for users of the SaaS Offering
|
||||
to access and/or use the SaaS Offering ("Prohibited SaaS Offering"); (v)
|
||||
circumvent the limitations on use of Elastic Software provided to You in
|
||||
Object Code format that are imposed or preserved by any License Key, or (vi)
|
||||
alter or remove any Marks and Notices in the Elastic Software. If You have any
|
||||
question as to whether a specific SaaS Offering constitutes a Prohibited SaaS
|
||||
Offering, or are interested in obtaining Elastic's permission to engage in
|
||||
commercial or non-commercial distribution of the Elastic Software, please
|
||||
contact elastic_license@elastic.co.
|
||||
|
||||
1.3 Third Party Open Source Software. The Commercial Software may contain or
|
||||
be provided with third party open source libraries, components, utilities and
|
||||
other open source software (collectively, "Open Source Software"), which Open
|
||||
Source Software may have applicable license terms as identified on a website
|
||||
designated by Elastic. Notwithstanding anything to the contrary herein, use of
|
||||
the Open Source Software shall be subject to the license terms and conditions
|
||||
applicable to such Open Source Software, to the extent required by the
|
||||
applicable licensor (which terms shall not restrict the license rights granted
|
||||
to You hereunder, but may contain additional rights). To the extent any
|
||||
condition of this Agreement conflicts with any license to the Open Source
|
||||
Software, the Open Source Software license will govern with respect to such
|
||||
Open Source Software only. Elastic may also separately provide you with
|
||||
certain open source software that is licensed by Elastic. Your use of such
|
||||
Elastic open source software will not be governed by this Agreement, but by
|
||||
the applicable open source license terms.
|
||||
|
||||
2. COMMERCIAL SOFTWARE SOURCE CODE
|
||||
|
||||
2.1 Limited License. Subject to the terms and conditions of Section 2.2 of
|
||||
this Agreement, Elastic hereby grants to You, AT NO CHARGE and for so long as
|
||||
you are not in breach of any provision of this Agreement, a limited,
|
||||
non-exclusive, non-transferable, fully paid up royalty free right and license
|
||||
to the Commercial Software in Source Code format, without the right to grant
|
||||
or authorize sublicenses, to prepare Derivative Works of the Commercial
|
||||
Software, provided You (i) do not hack the licensing mechanism, or otherwise
|
||||
circumvent the intended limitations on the use of Elastic Software to enable
|
||||
features other than Basic Features and Functions or those features You are
|
||||
entitled to as part of a Subscription, and (ii) use the resulting object code
|
||||
only for reasonable testing purposes.
|
||||
|
||||
2.2 Restrictions. Nothing in Section 2.1 grants You the right to (i) use the
|
||||
Commercial Software Source Code other than in accordance with Section 2.1
|
||||
above, (ii) use a Derivative Work of the Commercial Software outside of a
|
||||
Non-production Environment, in any production capacity, on a temporary or
|
||||
permanent basis, or (iii) transfer, sell, rent, lease, distribute, sublicense,
|
||||
loan or otherwise make available the Commercial Software Source Code, in whole
|
||||
or in part, to any third party. Notwithstanding the foregoing, You may
|
||||
maintain a copy of the repository in which the Source Code of the Commercial
|
||||
Software resides and that copy may be publicly accessible, provided that you
|
||||
include this Agreement with Your copy of the repository.
|
||||
|
||||
3. TERMINATION
|
||||
|
||||
3.1 Termination. This Agreement will automatically terminate, whether or not
|
||||
You receive notice of such Termination from Elastic, if You breach any of its
|
||||
provisions.
|
||||
|
||||
3.2 Post Termination. Upon any termination of this Agreement, for any reason,
|
||||
You shall promptly cease the use of the Elastic Software in Object Code format
|
||||
and cease use of the Commercial Software in Source Code format. For the
|
||||
avoidance of doubt, termination of this Agreement will not affect Your right
|
||||
to use Elastic Software, in either Object Code or Source Code formats, made
|
||||
available under the Apache License Version 2.0.
|
||||
|
||||
3.3 Survival. Sections 1.2, 2.2. 3.3, 4 and 5 shall survive any termination or
|
||||
expiration of this Agreement.
|
||||
|
||||
4. DISCLAIMER OF WARRANTIES AND LIMITATION OF LIABILITY
|
||||
|
||||
4.1 Disclaimer of Warranties. TO THE MAXIMUM EXTENT PERMITTED UNDER APPLICABLE
|
||||
LAW, THE ELASTIC SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
|
||||
AND ELASTIC AND ITS LICENSORS MAKE NO WARRANTIES WHETHER EXPRESSED, IMPLIED OR
|
||||
STATUTORY REGARDING OR RELATING TO THE ELASTIC SOFTWARE. TO THE MAXIMUM EXTENT
|
||||
PERMITTED UNDER APPLICABLE LAW, ELASTIC AND ITS LICENSORS SPECIFICALLY
|
||||
DISCLAIM ALL IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
PURPOSE AND NON-INFRINGEMENT WITH RESPECT TO THE ELASTIC SOFTWARE, AND WITH
|
||||
RESPECT TO THE USE OF THE FOREGOING. FURTHER, ELASTIC DOES NOT WARRANT RESULTS
|
||||
OF USE OR THAT THE ELASTIC SOFTWARE WILL BE ERROR FREE OR THAT THE USE OF THE
|
||||
ELASTIC SOFTWARE WILL BE UNINTERRUPTED.
|
||||
|
||||
4.2 Limitation of Liability. IN NO EVENT SHALL ELASTIC OR ITS LICENSORS BE
|
||||
LIABLE TO YOU OR ANY THIRD PARTY FOR ANY DIRECT OR INDIRECT DAMAGES,
|
||||
INCLUDING, WITHOUT LIMITATION, FOR ANY LOSS OF PROFITS, LOSS OF USE, BUSINESS
|
||||
INTERRUPTION, LOSS OF DATA, COST OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY
|
||||
SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, IN CONNECTION WITH
|
||||
OR ARISING OUT OF THE USE OR INABILITY TO USE THE ELASTIC SOFTWARE, OR THE
|
||||
PERFORMANCE OF OR FAILURE TO PERFORM THIS AGREEMENT, WHETHER ALLEGED AS A
|
||||
BREACH OF CONTRACT OR TORTIOUS CONDUCT, INCLUDING NEGLIGENCE, EVEN IF ELASTIC
|
||||
HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
5. MISCELLANEOUS
|
||||
|
||||
This Agreement completely and exclusively states the entire agreement of the
|
||||
parties regarding the subject matter herein, and it supersedes, and its terms
|
||||
govern, all prior proposals, agreements, or other communications between the
|
||||
parties, oral or written, regarding such subject matter. This Agreement may be
|
||||
modified by Elastic from time to time, and any such modifications will be
|
||||
effective upon the "Posted Date" set forth at the top of the modified
|
||||
Agreement. If any provision hereof is held unenforceable, this Agreement will
|
||||
continue without said provision and be interpreted to reflect the original
|
||||
intent of the parties. This Agreement and any non-contractual obligation
|
||||
arising out of or in connection with it, is governed exclusively by Dutch law.
|
||||
This Agreement shall not be governed by the 1980 UN Convention on Contracts
|
||||
for the International Sale of Goods. All disputes arising out of or in
|
||||
connection with this Agreement, including its existence and validity, shall be
|
||||
resolved by the courts with jurisdiction in Amsterdam, The Netherlands, except
|
||||
where mandatory law provides for the courts at another location in The
|
||||
Netherlands to have jurisdiction. The parties hereby irrevocably waive any and
|
||||
all claims and defenses either might otherwise have in any such action or
|
||||
proceeding in any of such courts based upon any alleged lack of personal
|
||||
jurisdiction, improper venue, forum non conveniens or any similar claim or
|
||||
defense. A breach or threatened breach, by You of Section 2 may cause
|
||||
irreparable harm for which damages at law may not provide adequate relief, and
|
||||
therefore Elastic shall be entitled to seek injunctive relief without being
|
||||
required to post a bond. You may not assign this Agreement (including by
|
||||
operation of law in connection with a merger or acquisition), in whole or in
|
||||
part to any third party without the prior written consent of Elastic, which
|
||||
may be withheld or granted by Elastic in its sole and absolute discretion.
|
||||
Any assignment in violation of the preceding sentence is void. Notices to
|
||||
Elastic may also be sent to legal@elastic.co.
|
||||
|
||||
6. DEFINITIONS
|
||||
|
||||
The following terms have the meanings ascribed:
|
||||
|
||||
6.1 "Affiliate" means, with respect to a party, any entity that controls, is
|
||||
controlled by, or which is under common control with, such party, where
|
||||
"control" means ownership of at least fifty percent (50%) of the outstanding
|
||||
voting shares of the entity, or the contractual right to establish policy for,
|
||||
and manage the operations of, the entity.
|
||||
|
||||
6.2 "Basic Features and Functions" means those features and functions of the
|
||||
Elastic Software that are eligible for use under a Basic license, as set forth
|
||||
at https://www.elastic.co/subscriptions, as may be modified by Elastic from
|
||||
time to time.
|
||||
|
||||
6.3 "Commercial Software" means the Elastic Software Source Code in any file
|
||||
containing a header stating the contents are subject to the Elastic License or
|
||||
which is contained in the repository folder labeled "x-pack", unless a LICENSE
|
||||
file present in the directory subtree declares a different license.
|
||||
|
||||
6.4 "Derivative Work of the Commercial Software" means, for purposes of this
|
||||
Agreement, any modification(s) or enhancement(s) to the Commercial Software,
|
||||
which represent, as a whole, an original work of authorship.
|
||||
|
||||
6.5 "License" means a limited, non-exclusive, non-transferable, fully paid up,
|
||||
royalty free, right and license, without the right to grant or authorize
|
||||
sublicenses, solely for Your internal business operations to (i) install and
|
||||
use the applicable Features and Functions of the Elastic Software in Object
|
||||
Code, and (ii) permit Contractors and Your Affiliates to use the Elastic
|
||||
software as set forth in (i) above, provided that such use by Contractors must
|
||||
be solely for Your benefit and/or the benefit of Your Affiliates, and You
|
||||
shall be responsible for all acts and omissions of such Contractors and
|
||||
Affiliates in connection with their use of the Elastic software that are
|
||||
contrary to the terms and conditions of this Agreement.
|
||||
|
||||
6.6 "License Key" means a sequence of bytes, including but not limited to a
|
||||
JSON blob, that is used to enable certain features and functions of the
|
||||
Elastic Software.
|
||||
|
||||
6.7 "Marks and Notices" means all Elastic trademarks, trade names, logos and
|
||||
notices present on the Documentation as originally provided by Elastic.
|
||||
|
||||
6.8 "Non-production Environment" means an environment for development, testing
|
||||
or quality assurance, where software is not used for production purposes.
|
||||
|
||||
6.9 "Object Code" means any form resulting from mechanical transformation or
|
||||
translation of Source Code form, including but not limited to compiled object
|
||||
code, generated documentation, and conversions to other media types.
|
||||
|
||||
6.10 "Source Code" means the preferred form of computer software for making
|
||||
modifications, including but not limited to software source code,
|
||||
documentation source, and configuration files.
|
||||
|
||||
6.11 "Subscription" means the right to receive Support Services and a License
|
||||
to the Commercial Software.
|
|
@ -0,0 +1,2 @@
|
|||
Elasticsearch X-Pack
|
||||
Copyright 2009-2017 Elasticsearch
|
|
@ -0,0 +1,120 @@
|
|||
= Elasticsearch X-Pack
|
||||
|
||||
A set of Elastic's commercial plugins for Elasticsearch:
|
||||
|
||||
- License
|
||||
- Security
|
||||
- Watcher
|
||||
- Monitoring
|
||||
- Machine Learning
|
||||
- Graph
|
||||
|
||||
= Setup
|
||||
|
||||
You must checkout `x-pack-elasticsearch` and `elasticsearch` with a specific directory structure. The
|
||||
`elasticsearch` checkout will be used when building `x-pack-elasticsearch`. The structure is:
|
||||
|
||||
- /path/to/elastic/elasticsearch
|
||||
- /path/to/elastic/elasticsearch-extra/x-pack-elasticsearch
|
||||
|
||||
== Vault Secret
|
||||
|
||||
The build requires a Vault Secret ID. You can use a GitHub token by following these steps:
|
||||
|
||||
1. Go to https://github.com/settings/tokens
|
||||
2. Click *Generate new token*
|
||||
3. Set permissions to `read:org`
|
||||
4. Copy the token into `~/.elastic/github.token`
|
||||
5. Set the token's file permissions to `600`
|
||||
|
||||
```
|
||||
$ mkdir ~/.elastic
|
||||
$ vi ~/.elastic/github.token
|
||||
# Add your_token exactly as it is into the file and save it
|
||||
$ chmod 600 ~/.elastic/github.token
|
||||
```
|
||||
|
||||
If you do not create the token, then you will see something along the lines of this as the failure when trying to build X-Pack:
|
||||
|
||||
```
|
||||
* What went wrong:
|
||||
Missing ~/.elastic/github.token file or VAULT_SECRET_ID environment variable, needed to authenticate with vault for secrets
|
||||
```
|
||||
|
||||
=== Offline Mode
|
||||
|
||||
When running the build in offline mode (`--offline`), it will not required to have the vault secret setup.
|
||||
|
||||
== Native Code
|
||||
|
||||
**This is mandatory as tests depend on it**
|
||||
|
||||
Machine Learning requires platform specific binaries, built from https://github.com/elastic/ml-cpp via CI servers.
|
||||
|
||||
= Build
|
||||
|
||||
- Run unit tests:
|
||||
+
|
||||
[source, txt]
|
||||
-----
|
||||
gradle clean test
|
||||
-----
|
||||
|
||||
- Run all tests:
|
||||
+
|
||||
[source, txt]
|
||||
-----
|
||||
gradle clean check
|
||||
-----
|
||||
|
||||
- Run integration tests:
|
||||
+
|
||||
[source, txt]
|
||||
-----
|
||||
gradle clean integTest
|
||||
-----
|
||||
|
||||
- Package X-Pack (without running tests)
|
||||
+
|
||||
[source, txt]
|
||||
-----
|
||||
gradle clean assemble
|
||||
-----
|
||||
|
||||
- Install X-Pack (without running tests)
|
||||
+
|
||||
[source, txt]
|
||||
-----
|
||||
gradle clean install
|
||||
-----
|
||||
|
||||
= Building documentation
|
||||
|
||||
The source files in this repository can be included in either the X-Pack
|
||||
Reference or the Elasticsearch Reference.
|
||||
|
||||
NOTE: In 5.4 and later, the Elasticsearch Reference includes X-Pack-specific
|
||||
content that is pulled from this repo.
|
||||
|
||||
To build the Elasticsearch Reference on your local machine, use the `docbldes`
|
||||
or `docbldesx` build commands defined in
|
||||
https://github.com/elastic/docs/blob/master/doc_build_aliases.sh
|
||||
|
||||
== Adding Images
|
||||
|
||||
When you include an image in the documentation, specify the path relative to the
|
||||
location of the asciidoc file. By convention, we put images in an `images`
|
||||
subdirectory.
|
||||
|
||||
For example to insert `watcher-ui-edit-watch.png` in `watcher/limitations.asciidoc`:
|
||||
|
||||
. Add an `images` subdirectory to the watcher directory if it doesn't already exist.
|
||||
. In `limitations.asciidoc` specify:
|
||||
+
|
||||
[source, txt]
|
||||
-----
|
||||
image::images/watcher-ui-edit-watch.png["Editing a watch"]
|
||||
-----
|
||||
|
||||
Please note that image names and anchor IDs must be unique within the book, so
|
||||
do not use generic identifiers.
|
|
@ -0,0 +1,86 @@
|
|||
import org.elasticsearch.gradle.BuildPlugin
|
||||
import org.elasticsearch.gradle.plugin.PluginBuildPlugin
|
||||
import org.elasticsearch.gradle.Version
|
||||
import org.elasticsearch.gradle.precommit.LicenseHeadersTask
|
||||
|
||||
if (project.projectDir.name != 'x-pack-elasticsearch') {
|
||||
throw new GradleException('You must checkout x-pack-elasticsearch in the following directory: <path to Elasticsearch checkout>/../elasticsearch-extra/x-pack-elasticsearch')
|
||||
}
|
||||
|
||||
task wrapper(type: Wrapper)
|
||||
|
||||
Project xpackRootProject = project
|
||||
|
||||
subprojects {
|
||||
group = 'org.elasticsearch.plugin'
|
||||
ext.xpackRootProject = xpackRootProject
|
||||
ext.xpackProject = { String projectName -> xpackRootProject.project(projectName) }
|
||||
// helper method to find the path to a module
|
||||
ext.xpackModule = { String moduleName -> xpackProject("plugin:${moduleName}").path }
|
||||
|
||||
plugins.withType(MavenPublishPlugin).whenPluginAdded {
|
||||
publishing {
|
||||
publications {
|
||||
// add license information to generated poms
|
||||
all {
|
||||
pom.withXml { XmlProvider xml ->
|
||||
Node node = xml.asNode()
|
||||
|
||||
Node license = node.appendNode('licenses').appendNode('license')
|
||||
license.appendNode('name', 'Elastic Commercial Software End User License Agreement')
|
||||
license.appendNode('url', 'https://www.elastic.co/eula/')
|
||||
license.appendNode('distribution', 'repo')
|
||||
|
||||
Node developer = node.appendNode('developers').appendNode('developer')
|
||||
developer.appendNode('name', 'Elastic')
|
||||
developer.appendNode('url', 'http://www.elastic.co')
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
plugins.withType(BuildPlugin).whenPluginAdded {
|
||||
project.licenseFile = xpackRootProject.file('LICENSE.txt')
|
||||
project.noticeFile = xpackRootProject.file('NOTICE.txt')
|
||||
}
|
||||
|
||||
plugins.withType(PluginBuildPlugin).whenPluginAdded {
|
||||
project.esplugin.licenseFile = xpackRootProject.file('LICENSE.txt')
|
||||
project.esplugin.noticeFile = xpackRootProject.file('NOTICE.txt')
|
||||
}
|
||||
}
|
||||
|
||||
File checkstyleSuppressions = file('dev-tools/checkstyle_suppressions.xml')
|
||||
subprojects {
|
||||
tasks.withType(Checkstyle) {
|
||||
inputs.file(checkstyleSuppressions)
|
||||
// Use x-pack-elasticsearch specific suppressions file rather than the open source one.
|
||||
configProperties = [
|
||||
suppressions: checkstyleSuppressions
|
||||
]
|
||||
}
|
||||
|
||||
tasks.withType(LicenseHeadersTask.class) {
|
||||
approvedLicenses = ['Elasticsearch Confidential', 'Generated']
|
||||
additionalLicense 'ESCON', 'Elasticsearch Confidential', 'ELASTICSEARCH CONFIDENTIAL'
|
||||
}
|
||||
ext.projectSubstitutions += [ "org.elasticsearch.plugin:x-pack-core:${version}": xpackModule('core')]
|
||||
ext.projectSubstitutions += [ "org.elasticsearch.plugin:x-pack-deprecation:${version}": xpackModule('deprecation')]
|
||||
ext.projectSubstitutions += [ "org.elasticsearch.plugin:x-pack-graph:${version}": xpackModule('graph')]
|
||||
ext.projectSubstitutions += [ "org.elasticsearch.plugin:x-pack-logstash:${version}": xpackModule('logstash')]
|
||||
ext.projectSubstitutions += [ "org.elasticsearch.plugin:x-pack-ml:${version}": xpackModule('ml')]
|
||||
ext.projectSubstitutions += [ "org.elasticsearch.plugin:x-pack-monitoring:${version}": xpackModule('monitoring')]
|
||||
ext.projectSubstitutions += [ "org.elasticsearch.plugin:x-pack-security:${version}": xpackModule('security')]
|
||||
ext.projectSubstitutions += [ "org.elasticsearch.plugin:x-pack-upgrade:${version}": xpackModule('upgrade')]
|
||||
ext.projectSubstitutions += [ "org.elasticsearch.plugin:x-pack-watcher:${version}": xpackModule('watcher')]
|
||||
|
||||
bwcVersions.snapshotProjectNames.each { snapshotName ->
|
||||
Version snapshot = bwcVersions.getSnapshotForProject(snapshotName)
|
||||
if (snapshot != null && snapshot.onOrAfter("6.3.0")) {
|
||||
String snapshotProject = ":x-pack-elasticsearch:plugin:bwc:${snapshotName}"
|
||||
project(snapshotProject).ext.bwcVersion = snapshot
|
||||
ext.projectSubstitutions["org.elasticsearch.plugin:x-pack:${snapshot}"] = snapshotProject
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
File extrasDir = new File(settingsDir, '../..').getCanonicalFile()
|
||||
if (extrasDir.name.endsWith('-extra') == false) {
|
||||
throw new GradleException("x-pack-elasticsearch must be checked out under an elasticsearch-extra directory, found ${extrasDir.name}")
|
||||
}
|
||||
File elasticsearchDir = new File(extrasDir.parentFile, extrasDir.name[0..-7])
|
||||
if (elasticsearchDir.exists() == false) {
|
||||
throw new GradleException("${elasticsearchDir.name} is missing as a sibling to ${extrasDir.name}")
|
||||
}
|
||||
|
||||
project(':').projectDir = new File(elasticsearchDir, 'buildSrc')
|
|
@ -0,0 +1,29 @@
|
|||
<?xml version="1.0"?>
|
||||
<!DOCTYPE suppressions PUBLIC
|
||||
"-//Puppy Crawl//DTD Suppressions 1.1//EN"
|
||||
"http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">
|
||||
|
||||
<suppressions>
|
||||
<!-- On Windows, Checkstyle matches files using \ path separator -->
|
||||
<!-- These files are generated by ANTLR so its silly to hold them to our rules. -->
|
||||
<suppress files="plugin[/\\]sql[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]sql[/\\]parser[/\\]SqlBase(Base(Listener|Visitor)|Lexer|Listener|Parser|Visitor).java" checks="." />
|
||||
|
||||
<suppress files="plugin[/\\]core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]ml[/\\]action[/\\]StopDatafeedAction.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]ml[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]ml[/\\]utils[/\\]DomainSplitFunction.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]persistent[/\\]CompletionPersistentTaskAction.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]Security.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]authc[/\\]Realms.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]authc[/\\]ldap[/\\]ActiveDirectorySIDUtil.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]ml[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]ml[/\\]integration[/\\]TooManyJobsIT.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]persistent[/\\]TestPersistentTasksPlugin.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]action[/\\]user[/\\]TransportGetUsersActionTests.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]authc[/\\]file[/\\]FileRealmTests.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]authc[/\\]ldap[/\\]ActiveDirectoryRealmTests.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]authc[/\\]ldap[/\\]ActiveDirectorySessionFactoryTests.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]authc[/\\]ldap[/\\]LdapRealmTests.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]authc[/\\]ldap[/\\]LdapSessionFactoryTests.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]authc[/\\]ldap[/\\]LdapUserSearchSessionFactoryTests.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]authc[/\\]ldap[/\\]support[/\\]SessionFactoryTests.java" checks="LineLength" />
|
||||
<suppress files="plugin[/\\]security[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]security[/\\]authc[/\\]pki[/\\]PkiRealmTests.java" checks="LineLength" />
|
||||
<suppress files="qa[/\\]security-example-extension[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]example[/\\]realm[/\\]CustomRealmTests.java" checks="LineLength" />
|
||||
</suppressions>
|
|
@ -0,0 +1,195 @@
|
|||
#!/bin/bash
|
||||
# This script is used as a single command to run the x-pack tests.
|
||||
#
|
||||
# It will attempt to check out 'elasticsearch' into a sibling directory
|
||||
# unless the environment variable `USE_EXISTING_ES` has a value. The
|
||||
# branch of elasticsearch which will be checked out depends on
|
||||
# environment variables. If running locally, set GIT_BRANCH. When
|
||||
# running in Jenkins, that env var is set. When running a PR
|
||||
# jenkins job, the variables PR_SOURCE_BRANCH and PR_TARGET_BRANCH
|
||||
# will be set and the source branch will be looked for in elasticsearch
|
||||
# before falling back to the target branch name.
|
||||
#
|
||||
# It will also attempt to install the appropriate version of node.js
|
||||
# for the Kibana plugin tests using nvm, unless
|
||||
# `xpack.kibana.build=false` is defined in
|
||||
# ~/.gradle/gradle.properties. Set a custom nvm directory using the
|
||||
# `NVM_DIR` environment variable.
|
||||
#
|
||||
|
||||
# Turn on semi-strict mode
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Allow the user choose different test through a single cli arg
|
||||
# default to `check` if no argument has been supplied
|
||||
key=${1-check}
|
||||
case $key in
|
||||
intake)
|
||||
GRADLE_CLI_ARGS=(
|
||||
"--info"
|
||||
"compileJava"
|
||||
"compileTestJava"
|
||||
"precommit"
|
||||
"check"
|
||||
"-Dtests.network=true"
|
||||
"-Dtests.badapples=true"
|
||||
)
|
||||
;;
|
||||
packagingTest)
|
||||
GRADLE_CLI_ARGS=(
|
||||
"--info"
|
||||
"-Pvagrant.boxes=all"
|
||||
"packagingTest"
|
||||
)
|
||||
;;
|
||||
packagingTestSample)
|
||||
GRADLE_CLI_ARGS=(
|
||||
"--info"
|
||||
"-Pvagrant.boxes=sample"
|
||||
"packagingTest"
|
||||
)
|
||||
;;
|
||||
bwcTest)
|
||||
GRADLE_CLI_ARGS=(
|
||||
"--info"
|
||||
"bwcTest"
|
||||
)
|
||||
;;
|
||||
check)
|
||||
GRADLE_CLI_ARGS=(
|
||||
"--info"
|
||||
"check"
|
||||
"-Dtests.network=true"
|
||||
"-Dtests.badapples=true"
|
||||
)
|
||||
;;
|
||||
releaseTest)
|
||||
GRADLE_CLI_ARGS=(
|
||||
"--info"
|
||||
"check"
|
||||
"-Dtests.network=true"
|
||||
"-Dtests.badapples=true"
|
||||
"-Dbuild.snapshot=false"
|
||||
"-Dlicense.key=/etc/x-pack/license.key"
|
||||
"-Dtests.jvm.argline=-Dbuild.snapshot=false"
|
||||
)
|
||||
;;
|
||||
*)
|
||||
echo "Unsupported cli argument $1. Allowed arguments are packagingTest or check. No argument defaults to check."
|
||||
exit 1;;
|
||||
esac
|
||||
|
||||
SCRIPT="$0"
|
||||
|
||||
# SCRIPT may be an arbitrarily deep series of symlinks. Loop until we have the concrete path.
|
||||
while [ -h "$SCRIPT" ] ; do
|
||||
ls=$(ls -ld "$SCRIPT")
|
||||
# Drop everything prior to ->
|
||||
link=$(expr "$ls" : '.*-> \(.*\)$')
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
SCRIPT="$link"
|
||||
else
|
||||
SCRIPT=$(dirname "$SCRIPT")/"$link"
|
||||
fi
|
||||
done
|
||||
|
||||
# determine base directory
|
||||
BASE_DIR=$(dirname "$SCRIPT")/..
|
||||
|
||||
# make BASE_DIR absolute
|
||||
BASE_DIR=$(cd "$BASE_DIR"; pwd)
|
||||
|
||||
PARENT_DIR=$(cd "$BASE_DIR"/../..; pwd)
|
||||
|
||||
# go to the parent directory
|
||||
cd $PARENT_DIR
|
||||
|
||||
if [ -z ${USE_EXISTING_ES:+x} ]; then
|
||||
if [ -d "./elasticsearch" ]; then
|
||||
echo "I expected a clean workspace but an 'elasticsearch' sibling directory already exists in [$PARENT_DIR]!"
|
||||
echo
|
||||
echo "Either define 'USE_EXISTING_ES' or remove the existing 'elasticsearch' sibling."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
function pick_clone_target {
|
||||
echo "picking which branch of elasticsearch to clone"
|
||||
|
||||
# PR_* are provided by the CI git plugin for pull requests
|
||||
if [[ -n "$PR_AUTHOR" && -n "$PR_SOURCE_BRANCH" ]]; then
|
||||
GH_USER="$PR_AUTHOR"
|
||||
BRANCH="$PR_SOURCE_BRANCH"
|
||||
echo " -> using pull request author $GH_USER and branch $BRANCH"
|
||||
if [[ -n "$(git ls-remote --heads https://github.com/$GH_USER/elasticsearch.git $BRANCH 2>/dev/null)" ]]; then
|
||||
return
|
||||
fi
|
||||
fi
|
||||
GH_USER="elastic"
|
||||
# GIT_BRANCH is provided by normal CI runs. It starts with the repo, i.e., origin/master
|
||||
# If we are not in CI, we fall back to the master branch
|
||||
BRANCH="${PR_TARGET_BRANCH:-${GIT_BRANCH#*/}}"
|
||||
BRANCH="${BRANCH:-master}"
|
||||
echo " -> using CI branch $BRANCH from elastic repo"
|
||||
}
|
||||
|
||||
pick_clone_target
|
||||
|
||||
DEPTH=1
|
||||
if [ -n "$BUILD_METADATA" ]; then
|
||||
IFS=';' read -ra metadata <<< "$BUILD_METADATA"
|
||||
for kv in "${metadata[@]}"; do
|
||||
IFS='=' read -ra key_value <<< "$kv"
|
||||
if [ "${key_value[0]}" == "git_ref_elasticsearch" ]; then
|
||||
# Force checked out hash if build metadata is set. We use a depth of 100, which
|
||||
# assumes there are no more than 100 commits between head of the branch and
|
||||
# last-good-commit. This is still quite a bit faster than pulling the entire history.
|
||||
ES_REF="${key_value[1]}"
|
||||
DEPTH=100
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
echo " -> checking out '$BRANCH' branch from $GH_USER/elasticsearch..."
|
||||
git clone -b $BRANCH "https://github.com/$GH_USER/elasticsearch.git" --depth=$DEPTH
|
||||
|
||||
if [ ! -z $ES_REF ]; then
|
||||
echo " -> using elasticsearch ref from build metadata: $ES_REF"
|
||||
git -C elasticsearch checkout $ES_REF
|
||||
else
|
||||
ES_REF="$(git -C elasticsearch rev-parse HEAD)"
|
||||
fi
|
||||
|
||||
echo " -> checked out elasticsearch revision: $ES_REF"
|
||||
echo
|
||||
|
||||
else
|
||||
if [ -d "./elasticsearch" ]; then
|
||||
echo "Using existing 'elasticsearch' checkout"
|
||||
else
|
||||
echo "You have defined 'USE_EXISTING_ES' but no existing Elasticsearch directory exists!"
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
|
||||
# back to base directory
|
||||
cd "$BASE_DIR"
|
||||
|
||||
echo "Running x-pack-elasticsearch tests..."
|
||||
echo "Running in $PWD"
|
||||
|
||||
# output the commands
|
||||
set -xuf
|
||||
|
||||
# clean
|
||||
./gradlew --stacktrace clean -Dorg.gradle.java.home=${RUNTIME_JAVA_HOME:-$JAVA_HOME}
|
||||
|
||||
# Actually run the tests
|
||||
GRADLE_CLI_ARGS+=("-Dorg.gradle.java.home=${RUNTIME_JAVA_HOME:-$JAVA_HOME}")
|
||||
./gradlew "${GRADLE_CLI_ARGS[@]}"
|
||||
|
||||
# write the ES hash we checked out to build metadata
|
||||
mkdir build
|
||||
echo "git_ref_elasticsearch=$ES_REF" > build/build_metadata
|
||||
|
||||
# ~*~ shell-script-mode ~*~
|
|
@ -0,0 +1,184 @@
|
|||
#!/usr/bin/env perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use HTTP::Tiny;
|
||||
use IO::Socket::SSL 1.52;
|
||||
use utf8;
|
||||
use Getopt::Long;
|
||||
|
||||
my $Base_URL = "https://api.github.com/repos/";
|
||||
my $User_Repo = 'elastic/x-pack-elasticsearch/';
|
||||
my $Issue_URL = "https://github.com/${User_Repo}issues";
|
||||
use JSON();
|
||||
use URI();
|
||||
use URI::Escape qw(uri_escape_utf8);
|
||||
|
||||
our $json = JSON->new->utf8(1);
|
||||
our $http = HTTP::Tiny->new(
|
||||
default_headers => {
|
||||
Accept => "application/vnd.github.v3+json",
|
||||
Authorization => load_github_key()
|
||||
}
|
||||
);
|
||||
|
||||
my %Opts = ( state => 'open' );
|
||||
|
||||
GetOptions(
|
||||
\%Opts, #
|
||||
'state=s', 'labels=s', 'add=s', 'remove=s'
|
||||
) || exit usage();
|
||||
|
||||
die usage('--state must be one of open|all|closed')
|
||||
unless $Opts{state} =~ /^(open|all|closed)$/;
|
||||
|
||||
die usage('--labels is required') unless $Opts{labels};
|
||||
die usage('Either --add or --remove is required')
|
||||
unless $Opts{add} || $Opts{remove};
|
||||
|
||||
relabel();
|
||||
|
||||
#===================================
|
||||
sub relabel {
|
||||
#===================================
|
||||
my @remove = split /,/, ( $Opts{remove} || '' );
|
||||
my @add = split /,/, ( $Opts{add} || '' );
|
||||
my $add_json = $json->encode( \@add );
|
||||
my $url = URI->new( $Base_URL . $User_Repo . 'issues' );
|
||||
$url->query_form(
|
||||
state => $Opts{state},
|
||||
labels => $Opts{labels},
|
||||
per_page => 100
|
||||
);
|
||||
|
||||
my $spool = Spool->new($url);
|
||||
while ( my $issue = $spool->next ) {
|
||||
my $id = $issue->{number};
|
||||
print "$Issue_URL/$id\n";
|
||||
if (@add) {
|
||||
add_label( $id, $add_json );
|
||||
}
|
||||
for (@remove) {
|
||||
remove_label( $id, $_ );
|
||||
}
|
||||
}
|
||||
print "Done\n";
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub add_label {
|
||||
#===================================
|
||||
my ( $id, $json ) = @_;
|
||||
my $response = $http->post(
|
||||
$Base_URL . $User_Repo . "issues/$id/labels",
|
||||
{ content => $json,
|
||||
headers => { "Content-Type" => "application/json; charset=utf-8" }
|
||||
}
|
||||
);
|
||||
|
||||
die "$response->{status} $response->{reason}\n"
|
||||
unless $response->{success};
|
||||
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub remove_label {
|
||||
#===================================
|
||||
my ( $id, $name ) = @_;
|
||||
my $url
|
||||
= $Base_URL
|
||||
. $User_Repo
|
||||
. "issues/$id/labels/"
|
||||
. uri_escape_utf8($name);
|
||||
my $response = $http->delete($url);
|
||||
|
||||
die "$response->{status} $response->{reason}\n"
|
||||
unless $response->{success};
|
||||
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub load_github_key {
|
||||
#===================================
|
||||
my ($file) = glob("~/.github_auth");
|
||||
unless ( -e $file ) {
|
||||
warn "File ~/.github_auth doesn't exist - using anonymous API. "
|
||||
. "Generate a Personal Access Token at https://github.com/settings/applications\n";
|
||||
return '';
|
||||
}
|
||||
open my $fh, $file or die "Couldn't open $file: $!";
|
||||
my ($key) = <$fh> || die "Couldn't read $file: $!";
|
||||
$key =~ s/^\s+//;
|
||||
$key =~ s/\s+$//;
|
||||
die "Invalid GitHub key: $key"
|
||||
unless $key =~ /^[0-9a-f]{40}$/;
|
||||
return "token $key";
|
||||
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub usage {
|
||||
#===================================
|
||||
my $msg = shift || '';
|
||||
|
||||
if ($msg) {
|
||||
$msg = "\nERROR: $msg\n\n";
|
||||
}
|
||||
return $msg . <<"USAGE";
|
||||
$0 --state=open|closed|all --labels=foo,bar --add=new1,new2 --remove=old1,old2
|
||||
|
||||
USAGE
|
||||
|
||||
}
|
||||
|
||||
package Spool;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
#===================================
|
||||
sub new {
|
||||
#===================================
|
||||
my $class = shift;
|
||||
my $url = shift;
|
||||
return bless {
|
||||
url => $url,
|
||||
buffer => []
|
||||
},
|
||||
$class;
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub next {
|
||||
#===================================
|
||||
my $self = shift;
|
||||
if ( @{ $self->{buffer} } == 0 ) {
|
||||
$self->refill;
|
||||
}
|
||||
return shift @{ $self->{buffer} };
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub refill {
|
||||
#===================================
|
||||
my $self = shift;
|
||||
return unless $self->{url};
|
||||
my $response = $http->get( $self->{url} );
|
||||
die "$response->{status} $response->{reason}\n"
|
||||
unless $response->{success};
|
||||
|
||||
$self->{url} = '';
|
||||
|
||||
if ( my $link = $response->{headers}{link} ) {
|
||||
my @links = ref $link eq 'ARRAY' ? @$link : $link;
|
||||
for ($link) {
|
||||
next unless $link =~ /<([^>]+)>; rel="next"/;
|
||||
$self->{url} = $1;
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
push @{ $self->{buffer} }, @{ $json->decode( $response->{content} ) };
|
||||
|
||||
}
|
|
@ -0,0 +1,200 @@
|
|||
# Smoke-tests a x-pack release candidate
|
||||
#
|
||||
# 1. Downloads the zip file from the staging URL
|
||||
# 3. Installs x-pack plugin
|
||||
# 4. Starts one node for zip package and checks:
|
||||
# -- if x-pack plugin is loaded
|
||||
# -- checks xpack info page, if response returns correct version and feature set info
|
||||
#
|
||||
# USAGE:
|
||||
#
|
||||
# python3 -B ./dev-tools/smoke_test_rc.py --version 5.0.0-beta1 --hash bfa3e47
|
||||
#
|
||||
|
||||
import argparse
|
||||
import tempfile
|
||||
import os
|
||||
import signal
|
||||
import shutil
|
||||
import urllib
|
||||
import urllib.request
|
||||
import time
|
||||
import json
|
||||
import base64
|
||||
from http.client import HTTPConnection
|
||||
|
||||
# in case of debug, uncomment
|
||||
# HTTPConnection.debuglevel = 4
|
||||
|
||||
try:
|
||||
JAVA_HOME = os.environ['JAVA_HOME']
|
||||
except KeyError:
|
||||
raise RuntimeError("""
|
||||
Please set JAVA_HOME in the env before running release tool
|
||||
On OSX use: export JAVA_HOME=`/usr/libexec/java_home -v '1.8*'`""")
|
||||
|
||||
def java_exe():
|
||||
path = JAVA_HOME
|
||||
return 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % (path, path, path)
|
||||
|
||||
def verify_java_version(version):
|
||||
s = os.popen('%s; java -version 2>&1' % java_exe()).read()
|
||||
if ' version "%s.' % version not in s:
|
||||
raise RuntimeError('got wrong version for java %s:\n%s' % (version, s))
|
||||
|
||||
def read_fully(file):
|
||||
with open(file, encoding='utf-8') as f:
|
||||
return f.read()
|
||||
|
||||
def wait_for_node_startup(es_dir, timeout=60, headers={}):
|
||||
print(' Waiting until node becomes available for at most %s seconds' % timeout)
|
||||
for _ in range(timeout):
|
||||
conn = None
|
||||
try:
|
||||
time.sleep(1)
|
||||
host = get_host_from_ports_file(es_dir)
|
||||
conn = HTTPConnection(host, timeout=1)
|
||||
conn.request('GET', '/', headers=headers)
|
||||
res = conn.getresponse()
|
||||
if res.status == 200:
|
||||
return True
|
||||
except IOError as e:
|
||||
pass
|
||||
#that is ok it might not be there yet
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
def download_release(version, release_hash, url):
|
||||
print('Downloading release %s from %s' % (version, url))
|
||||
tmp_dir = tempfile.mkdtemp()
|
||||
try:
|
||||
downloaded_files = []
|
||||
print(' ' + '*' * 80)
|
||||
print(' Downloading %s' % (url))
|
||||
file = ('elasticsearch-%s.zip' % version)
|
||||
artifact_path = os.path.join(tmp_dir, file)
|
||||
downloaded_files.append(artifact_path)
|
||||
urllib.request.urlretrieve(url, os.path.join(tmp_dir, file))
|
||||
print(' ' + '*' * 80)
|
||||
print()
|
||||
|
||||
smoke_test_release(version, downloaded_files, release_hash)
|
||||
print(' SUCCESS')
|
||||
finally:
|
||||
shutil.rmtree(tmp_dir)
|
||||
|
||||
def get_host_from_ports_file(es_dir):
|
||||
return read_fully(os.path.join(es_dir, 'logs/http.ports')).splitlines()[0]
|
||||
|
||||
def smoke_test_release(release, files, release_hash):
|
||||
for release_file in files:
|
||||
if not os.path.isfile(release_file):
|
||||
raise RuntimeError('Smoketest failed missing file %s' % (release_file))
|
||||
tmp_dir = tempfile.mkdtemp()
|
||||
run('unzip %s -d %s' % (release_file, tmp_dir))
|
||||
|
||||
es_dir = os.path.join(tmp_dir, 'elasticsearch-%s' % (release))
|
||||
es_run_path = os.path.join(es_dir, 'bin/elasticsearch')
|
||||
|
||||
print(' Smoke testing package [%s]' % release_file)
|
||||
es_plugin_path = os.path.join(es_dir, 'bin/elasticsearch-plugin')
|
||||
|
||||
print(' Install xpack [%s]')
|
||||
run('%s; ES_JAVA_OPTS="-Des.plugins.staging=%s" %s install -b x-pack' % (java_exe(), release_hash, es_plugin_path))
|
||||
headers = { 'Authorization' : 'Basic %s' % base64.b64encode(b"es_admin:foobar").decode("UTF-8") }
|
||||
es_shield_path = os.path.join(es_dir, 'bin/x-pack/users')
|
||||
|
||||
print(" Install dummy shield user")
|
||||
run('%s; %s useradd es_admin -r superuser -p foobar' % (java_exe(), es_shield_path))
|
||||
|
||||
print(' Starting elasticsearch daemon from [%s]' % es_dir)
|
||||
try:
|
||||
run('%s; %s -Enode.name=smoke_tester -Ecluster.name=prepare_release -Erepositories.url.allowed_urls=http://snapshot.test* %s -Epidfile=%s -Enode.portsfile=true'
|
||||
% (java_exe(), es_run_path, '-d', os.path.join(es_dir, 'es-smoke.pid')))
|
||||
if not wait_for_node_startup(es_dir, headers=headers):
|
||||
print("elasticsearch logs:")
|
||||
print('*' * 80)
|
||||
logs = read_fully(os.path.join(es_dir, 'logs/prepare_release.log'))
|
||||
print(logs)
|
||||
print('*' * 80)
|
||||
raise RuntimeError('server didn\'t start up')
|
||||
try: # we now get / and /_nodes to fetch basic infos like hashes etc and the installed plugins
|
||||
host = get_host_from_ports_file(es_dir)
|
||||
conn = HTTPConnection(host, timeout=20)
|
||||
|
||||
# check if plugin is loaded
|
||||
conn.request('GET', '/_nodes/plugins?pretty=true', headers=headers)
|
||||
res = conn.getresponse()
|
||||
if res.status == 200:
|
||||
nodes = json.loads(res.read().decode("utf-8"))['nodes']
|
||||
for _, node in nodes.items():
|
||||
node_plugins = node['plugins']
|
||||
for node_plugin in node_plugins:
|
||||
if node_plugin['name'] != 'x-pack':
|
||||
raise RuntimeError('Unexpected plugin %s, expected x-pack only' % node_plugin['name'])
|
||||
else:
|
||||
raise RuntimeError('Expected HTTP 200 but got %s' % res.status)
|
||||
|
||||
# check if license is the default one
|
||||
# also sleep for few more seconds, as the initial license generation might take some time
|
||||
time.sleep(5)
|
||||
conn.request('GET', '/_xpack', headers=headers)
|
||||
res = conn.getresponse()
|
||||
if res.status == 200:
|
||||
xpack = json.loads(res.read().decode("utf-8"))
|
||||
if xpack['license']['type'] != 'trial':
|
||||
raise RuntimeError('expected license type to be trial, was %s' % xpack['license']['type'])
|
||||
if xpack['license']['mode'] != 'trial':
|
||||
raise RuntimeError('expected license mode to be trial, was %s' % xpack['license']['mode'])
|
||||
if xpack['license']['status'] != 'active':
|
||||
raise RuntimeError('expected license status to be active, was %s' % xpack['license']['active'])
|
||||
else:
|
||||
raise RuntimeError('Expected HTTP 200 but got %s' % res.status)
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
finally:
|
||||
pid_path = os.path.join(es_dir, 'es-smoke.pid')
|
||||
if os.path.exists(pid_path): # try reading the pid and kill the node
|
||||
pid = int(read_fully(pid_path))
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
shutil.rmtree(tmp_dir)
|
||||
print(' ' + '*' * 80)
|
||||
print()
|
||||
|
||||
# console colors
|
||||
COLOR_OK = '\033[92m'
|
||||
COLOR_END = '\033[0m'
|
||||
|
||||
def run(command, env_vars=None):
|
||||
if env_vars:
|
||||
for key, value in env_vars.items():
|
||||
os.putenv(key, value)
|
||||
print('*** Running: %s%s%s' % (COLOR_OK, command, COLOR_END))
|
||||
if os.system(command):
|
||||
raise RuntimeError(' FAILED: %s' % (command))
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='SmokeTests a Release Candidate from S3 staging repo')
|
||||
parser.add_argument('--version', '-v', dest='version', default=None,
|
||||
help='The Elasticsearch Version to smoke-tests', required=True)
|
||||
parser.add_argument('--hash', '-r', dest='hash', default=None, required=True,
|
||||
help='The sha1 short hash of the release git commit to smoketest')
|
||||
parser.add_argument('--fetch_url', '-u', dest='url', default=None,
|
||||
help='Fetched from the specified URL')
|
||||
parser.set_defaults(hash=None)
|
||||
parser.set_defaults(version=None)
|
||||
parser.set_defaults(url=None)
|
||||
args = parser.parse_args()
|
||||
version = args.version
|
||||
hash = args.hash
|
||||
url = args.url
|
||||
verify_java_version('1.8')
|
||||
if url:
|
||||
download_url = url
|
||||
else:
|
||||
download_url = 'https://staging.elastic.co/%s-%s/downloads/elasticsearch/elasticsearch-%s.zip' % (version, hash, version)
|
||||
download_release(version, hash, download_url)
|
||||
|
|
@ -0,0 +1,253 @@
|
|||
#!/usr/bin/env perl
|
||||
# Licensed to Elasticsearch under one or more contributor
|
||||
# license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright
|
||||
# ownership. Elasticsearch licenses this file to you under
|
||||
# the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on
|
||||
# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
# either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use HTTP::Tiny 0.070;
|
||||
use IO::Socket::SSL 1.52;
|
||||
use utf8;
|
||||
|
||||
my $Github_Key = load_github_key();
|
||||
my $Base_URL = "https://${Github_Key}api.github.com/repos/";
|
||||
my $User_Repo = 'elastic/x-pack-elasticsearch/';
|
||||
my $Issue_URL = "http://github.com/${User_Repo}issues/";
|
||||
|
||||
my @Groups = (
|
||||
"breaking", "breaking-java", "deprecation", "feature",
|
||||
"enhancement", "bug", "regression", "upgrade", "non-issue", "build",
|
||||
"docs", "test"
|
||||
);
|
||||
my %Group_Labels = (
|
||||
breaking => 'Breaking changes',
|
||||
'breaking-java' => 'Breaking Java changes',
|
||||
build => 'Build',
|
||||
deprecation => 'Deprecations',
|
||||
docs => 'Docs',
|
||||
feature => 'New features',
|
||||
enhancement => 'Enhancements',
|
||||
bug => 'Bug fixes',
|
||||
regression => 'Regressions',
|
||||
test => 'Tests',
|
||||
upgrade => 'Upgrades',
|
||||
"non-issue" => 'Non-issue',
|
||||
other => 'NOT CLASSIFIED',
|
||||
);
|
||||
|
||||
use JSON();
|
||||
use Encode qw(encode_utf8);
|
||||
|
||||
my $json = JSON->new->utf8(1);
|
||||
|
||||
my %All_Labels = fetch_labels();
|
||||
|
||||
my $version = shift @ARGV
|
||||
or dump_labels();
|
||||
|
||||
dump_labels("Unknown version '$version'")
|
||||
unless $All_Labels{$version};
|
||||
|
||||
my $issues = fetch_issues($version);
|
||||
dump_issues( $version, $issues );
|
||||
|
||||
#===================================
|
||||
sub dump_issues {
|
||||
#===================================
|
||||
my $version = shift;
|
||||
my $issues = shift;
|
||||
|
||||
$version =~ s/v//;
|
||||
my ( $day, $month, $year ) = (gmtime)[ 3 .. 5 ];
|
||||
$month++;
|
||||
$year += 1900;
|
||||
|
||||
print <<"ASCIIDOC";
|
||||
:issue: https://github.com/${User_Repo}issues/
|
||||
:pull: https://github.com/${User_Repo}pull/
|
||||
|
||||
[[release-notes-$version]]
|
||||
== $version Release Notes
|
||||
|
||||
ASCIIDOC
|
||||
|
||||
for my $group ( @Groups, 'other' ) {
|
||||
my $group_issues = $issues->{$group} or next;
|
||||
print "[[$group-$version]]\n"
|
||||
. "[float]\n"
|
||||
. "=== $Group_Labels{$group}\n\n";
|
||||
|
||||
for my $header ( sort keys %$group_issues ) {
|
||||
my $header_issues = $group_issues->{$header};
|
||||
print( $header || 'HEADER MISSING', "::\n" );
|
||||
|
||||
for my $issue (@$header_issues) {
|
||||
my $title = $issue->{title};
|
||||
|
||||
if ( $issue->{state} eq 'open' ) {
|
||||
$title .= " [OPEN]";
|
||||
}
|
||||
unless ( $issue->{pull_request} ) {
|
||||
$title .= " [ISSUE]";
|
||||
}
|
||||
my $number = $issue->{number};
|
||||
|
||||
# print encode_utf8("* $title {pull}${number}[#${number}]");
|
||||
print encode_utf8("* $title");
|
||||
print "\n";
|
||||
print encode_utf8("// https://github.com/${User_Repo}pull/${number}[#${number}]");
|
||||
if ( my $related = $issue->{related_issues} ) {
|
||||
my %uniq = map { $_ => 1 } @$related;
|
||||
print keys %uniq > 1
|
||||
? " (issues: "
|
||||
: " (issue: ";
|
||||
# print join ", ", map {"{issue}${_}[#${_}]"}
|
||||
# print join ", ", map {"#${_}"}
|
||||
print join ", ", map {"https://github.com/${User_Repo}issues/${_}[#${_}]"}
|
||||
sort keys %uniq;
|
||||
print ")";
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
print "\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub fetch_issues {
|
||||
#===================================
|
||||
my $version = shift;
|
||||
my @issues;
|
||||
my %seen;
|
||||
for my $state ( 'open', 'closed' ) {
|
||||
my $page = 1;
|
||||
while (1) {
|
||||
my $tranche
|
||||
= fetch( $User_Repo
|
||||
. 'issues?labels='
|
||||
. $version
|
||||
. '&pagesize=100&state='
|
||||
. $state
|
||||
. '&page='
|
||||
. $page )
|
||||
or die "Couldn't fetch issues for version '$version'";
|
||||
push @issues, @$tranche;
|
||||
|
||||
for my $issue (@$tranche) {
|
||||
next unless $issue->{pull_request};
|
||||
for ( $issue->{body} =~ m{(?:#|${User_Repo}issues/)(\d+)}g ) {
|
||||
$seen{$_}++;
|
||||
push @{ $issue->{related_issues} }, $_;
|
||||
}
|
||||
}
|
||||
$page++;
|
||||
last unless @$tranche;
|
||||
}
|
||||
}
|
||||
|
||||
my %group;
|
||||
ISSUE:
|
||||
for my $issue (@issues) {
|
||||
next if $seen{ $issue->{number} } && !$issue->{pull_request};
|
||||
|
||||
# uncomment for including/excluding PRs already issued in other versions
|
||||
# next if grep {$_->{name}=~/^v2/} @{$issue->{labels}};
|
||||
my %labels = map { $_->{name} => 1 } @{ $issue->{labels} };
|
||||
my ($header) = map { substr( $_, 1 ) } grep {/^:/} sort keys %labels;
|
||||
$header ||= 'NOT CLASSIFIED';
|
||||
for (@Groups) {
|
||||
if ( $labels{$_} ) {
|
||||
push @{ $group{$_}{$header} }, $issue;
|
||||
next ISSUE;
|
||||
}
|
||||
}
|
||||
push @{ $group{other}{$header} }, $issue;
|
||||
}
|
||||
|
||||
return \%group;
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub fetch_labels {
|
||||
#===================================
|
||||
my %all;
|
||||
my $page = 1;
|
||||
while (1) {
|
||||
my $labels = fetch( $User_Repo . 'labels?page=' . $page++ )
|
||||
or die "Couldn't retrieve version labels";
|
||||
last unless @$labels;
|
||||
for (@$labels) {
|
||||
my $name = $_->{name};
|
||||
next unless $name =~ /^v/;
|
||||
$all{$name} = 1;
|
||||
}
|
||||
}
|
||||
return %all;
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub fetch {
|
||||
#===================================
|
||||
my $url = $Base_URL . shift();
|
||||
# print "$url\n";
|
||||
my $response = HTTP::Tiny->new->get($url);
|
||||
# use Data::Dumper;
|
||||
# print Dumper($response);
|
||||
die "$response->{status} $response->{reason}\n"
|
||||
unless $response->{success};
|
||||
# print $response->{content};
|
||||
return $json->decode( $response->{content} );
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub load_github_key {
|
||||
#===================================
|
||||
|
||||
my ($file) = glob("~/.github_auth");
|
||||
unless ( -e $file ) {
|
||||
warn "File ~/.github_auth doesn't exist - using anonymous API. "
|
||||
. "Generate a personal access token that has repo scope. See https://github.com/elastic/dev/blob/master/shared/development_process.md \n";
|
||||
return '';
|
||||
}
|
||||
open my $fh, $file or die "Couldn't open $file: $!";
|
||||
my ($key) = <$fh> || die "Couldn't read $file: $!";
|
||||
$key =~ s/^\s+//;
|
||||
$key =~ s/\s+$//;
|
||||
die "Invalid GitHub key: $key"
|
||||
unless $key =~ /^[0-9a-f]{40}$/;
|
||||
return "$key:x-oauth-basic@";
|
||||
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub dump_labels {
|
||||
#===================================
|
||||
my $error = shift || '';
|
||||
if ($error) {
|
||||
$error = "\nERROR: $error\n";
|
||||
}
|
||||
my $labels = join( "\n - ", '', ( sort keys %All_Labels ) );
|
||||
die <<USAGE
|
||||
$error
|
||||
USAGE: $0 version > outfile
|
||||
|
||||
Known versions:$labels
|
||||
|
||||
USAGE
|
||||
|
||||
}
|
|
@ -0,0 +1,270 @@
|
|||
#!/usr/bin/env perl
|
||||
# Licensed to Elasticsearch under one or more contributor
|
||||
# license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright
|
||||
# ownership. Elasticsearch licenses this file to you under
|
||||
# the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on
|
||||
# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
# either express or implied. See the License for the specific
|
||||
# language governing permissions and limitations under the License.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use HTTP::Tiny 0.070;
|
||||
use IO::Socket::SSL 1.52;
|
||||
use utf8;
|
||||
|
||||
my $Github_Key = load_github_key();
|
||||
my $Base_URL = "https://${Github_Key}api.github.com/repos/";
|
||||
my $User_Repo1 = 'elastic/x-pack-elasticsearch/';
|
||||
my $Issue_URL1 = "http://github.com/${User_Repo1}issues/";
|
||||
my $User_Repo2 = 'elastic/machine-learning-cpp/';
|
||||
my $Issue_URL2 = "http://github.com/${User_Repo2}issues/";
|
||||
|
||||
my @Groups = (
|
||||
"breaking", "breaking-java", "deprecation", "feature",
|
||||
"enhancement", "bug", "regression", "upgrade", "non-issue", "build",
|
||||
"docs", "test"
|
||||
);
|
||||
my %Group_Labels = (
|
||||
breaking => 'Breaking changes',
|
||||
'breaking-java' => 'Breaking Java changes',
|
||||
build => 'Build',
|
||||
deprecation => 'Deprecations',
|
||||
docs => 'Docs',
|
||||
feature => 'New features',
|
||||
enhancement => 'Enhancements',
|
||||
bug => 'Bug fixes',
|
||||
regression => 'Regressions',
|
||||
test => 'Tests',
|
||||
upgrade => 'Upgrades',
|
||||
"non-issue" => 'Non-issue',
|
||||
other => 'NOT CLASSIFIED',
|
||||
);
|
||||
|
||||
use JSON();
|
||||
use Encode qw(encode_utf8);
|
||||
|
||||
my $json = JSON->new->utf8(1);
|
||||
|
||||
my %All_Labels1 = fetch_labels($User_Repo1);
|
||||
|
||||
my $version = shift @ARGV
|
||||
or dump_labels();
|
||||
|
||||
dump_labels(%All_Labels1, "Unknown version '$version'")
|
||||
unless $All_Labels1{$version};
|
||||
|
||||
my $issues1 = fetch_issues($User_Repo1, $version);
|
||||
|
||||
# Repeat steps for second repo
|
||||
|
||||
my %All_Labels2 = fetch_labels($User_Repo2);
|
||||
|
||||
dump_labels(%All_Labels2, "Unknown version '$version'")
|
||||
unless $All_Labels2{$version};
|
||||
|
||||
my $issues2 = fetch_issues($User_Repo2, $version);
|
||||
|
||||
dump_issues( $User_Repo1, $version, $issues1 );
|
||||
dump_issues( $User_Repo2, $version, $issues2 );
|
||||
|
||||
#===================================
|
||||
sub dump_issues {
|
||||
#===================================
|
||||
my $User_Repo = shift;
|
||||
my $version = shift;
|
||||
my $issues = shift;
|
||||
|
||||
$version =~ s/v//;
|
||||
my ( $day, $month, $year ) = (gmtime)[ 3 .. 5 ];
|
||||
$month++;
|
||||
$year += 1900;
|
||||
|
||||
print <<"ASCIIDOC";
|
||||
|
||||
[[release-notes-$version]]
|
||||
== X-Pack $version Release Notes
|
||||
|
||||
// Pulled from $User_Repo
|
||||
|
||||
ASCIIDOC
|
||||
|
||||
for my $group ( @Groups, 'other' ) {
|
||||
my $group_issues = $issues->{$group} or next;
|
||||
print "[[$group-$version]]\n"
|
||||
. "[float]\n"
|
||||
. "=== $Group_Labels{$group}\n\n";
|
||||
|
||||
for my $header ( sort keys %$group_issues ) {
|
||||
my $header_issues = $group_issues->{$header};
|
||||
print( $header || 'HEADER MISSING', "::\n" );
|
||||
|
||||
for my $issue (@$header_issues) {
|
||||
my $title = $issue->{title};
|
||||
|
||||
if ( $issue->{state} eq 'open' ) {
|
||||
$title .= " [OPEN]";
|
||||
}
|
||||
unless ( $issue->{pull_request} ) {
|
||||
$title .= " [ISSUE]";
|
||||
}
|
||||
my $number = $issue->{number};
|
||||
|
||||
# print encode_utf8("* $title {pull}${number}[#${number}]");
|
||||
print encode_utf8("* $title");
|
||||
print "\n";
|
||||
print encode_utf8("// https://github.com/${User_Repo}pull/${number}[#${number}]");
|
||||
if ( my $related = $issue->{related_issues} ) {
|
||||
my %uniq = map { $_ => 1 } @$related;
|
||||
print keys %uniq > 1
|
||||
? " (issues: "
|
||||
: " (issue: ";
|
||||
# print join ", ", map {"{issue}${_}[#${_}]"}
|
||||
# print join ", ", map {"#${_}"}
|
||||
print join ", ", map {"https://github.com/${User_Repo}issues/${_}[#${_}]"}
|
||||
sort keys %uniq;
|
||||
print ")";
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
print "\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub fetch_issues {
|
||||
#===================================
|
||||
my $User_Repo = shift;
|
||||
my $version = shift;
|
||||
my @issues;
|
||||
my %seen;
|
||||
for my $state ( 'open', 'closed' ) {
|
||||
my $page = 1;
|
||||
while (1) {
|
||||
my $tranche
|
||||
= fetch( $User_Repo
|
||||
. 'issues?labels='
|
||||
. $version
|
||||
. '&pagesize=100&state='
|
||||
. $state
|
||||
. '&page='
|
||||
. $page )
|
||||
or die "Couldn't fetch issues for version '$version'";
|
||||
push @issues, @$tranche;
|
||||
|
||||
for my $issue (@$tranche) {
|
||||
next unless $issue->{pull_request};
|
||||
for ( $issue->{body} =~ m{(?:#|${User_Repo}issues/)(\d+)}g ) {
|
||||
$seen{$_}++;
|
||||
push @{ $issue->{related_issues} }, $_;
|
||||
}
|
||||
}
|
||||
$page++;
|
||||
last unless @$tranche;
|
||||
}
|
||||
}
|
||||
|
||||
my %group;
|
||||
ISSUE:
|
||||
for my $issue (@issues) {
|
||||
next if $seen{ $issue->{number} } && !$issue->{pull_request};
|
||||
|
||||
# uncomment for including/excluding PRs already issued in other versions
|
||||
# next if grep {$_->{name}=~/^v2/} @{$issue->{labels}};
|
||||
my %labels = map { $_->{name} => 1 } @{ $issue->{labels} };
|
||||
my ($header) = map { substr( $_, 1 ) } grep {/^:/} sort keys %labels;
|
||||
$header ||= 'NOT CLASSIFIED';
|
||||
for (@Groups) {
|
||||
if ( $labels{$_} ) {
|
||||
push @{ $group{$_}{$header} }, $issue;
|
||||
next ISSUE;
|
||||
}
|
||||
}
|
||||
push @{ $group{other}{$header} }, $issue;
|
||||
}
|
||||
|
||||
return \%group;
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub fetch_labels {
|
||||
#===================================
|
||||
my $User_Repo = shift;
|
||||
my %all;
|
||||
my $page = 1;
|
||||
while (1) {
|
||||
my $labels = fetch( $User_Repo . 'labels?page=' . $page++ )
|
||||
or die "Couldn't retrieve version labels";
|
||||
last unless @$labels;
|
||||
for (@$labels) {
|
||||
my $name = $_->{name};
|
||||
next unless $name =~ /^v/;
|
||||
$all{$name} = 1;
|
||||
}
|
||||
}
|
||||
return %all;
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub fetch {
|
||||
#===================================
|
||||
my $url = $Base_URL . shift();
|
||||
# print "$url\n";
|
||||
my $response = HTTP::Tiny->new->get($url);
|
||||
# use Data::Dumper;
|
||||
# print Dumper($response);
|
||||
die "$response->{status} $response->{reason}\n"
|
||||
unless $response->{success};
|
||||
# print $response->{content};
|
||||
return $json->decode( $response->{content} );
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub load_github_key {
|
||||
#===================================
|
||||
|
||||
my ($file) = glob("~/.github_auth");
|
||||
unless ( -e $file ) {
|
||||
warn "File ~/.github_auth doesn't exist - using anonymous API. "
|
||||
. "Generate a personal access token that has repo scope. See https://github.com/elastic/dev/blob/master/shared/development_process.md \n";
|
||||
return '';
|
||||
}
|
||||
open my $fh, $file or die "Couldn't open $file: $!";
|
||||
my ($key) = <$fh> || die "Couldn't read $file: $!";
|
||||
$key =~ s/^\s+//;
|
||||
$key =~ s/\s+$//;
|
||||
die "Invalid GitHub key: $key"
|
||||
unless $key =~ /^[0-9a-f]{40}$/;
|
||||
return "$key:x-oauth-basic@";
|
||||
|
||||
}
|
||||
|
||||
#===================================
|
||||
sub dump_labels {
|
||||
#===================================
|
||||
my %All_Labels = shift;
|
||||
my $error = shift || '';
|
||||
if ($error) {
|
||||
$error = "\nERROR: $error\n";
|
||||
}
|
||||
my $labels = join( "\n - ", '', ( sort keys %All_Labels ) );
|
||||
die <<USAGE
|
||||
$error
|
||||
USAGE: $0 version > outfile
|
||||
|
||||
Known versions:$labels
|
||||
|
||||
USAGE
|
||||
|
||||
}
|
|
@ -0,0 +1,683 @@
|
|||
import org.elasticsearch.gradle.test.NodeInfo
|
||||
|
||||
import java.nio.charset.StandardCharsets
|
||||
|
||||
apply plugin: 'elasticsearch.docs-test'
|
||||
|
||||
/* List of files that have snippets that probably should be converted to
|
||||
* `// CONSOLE` and `// TESTRESPONSE` but have yet to be converted. Try and
|
||||
* only remove entries from this list. When it is empty we'll remove it
|
||||
* entirely and have a party! There will be cake and everything.... */
|
||||
buildRestTests.expectedUnconvertedCandidates = [
|
||||
'en/ml/functions/count.asciidoc',
|
||||
'en/ml/functions/geo.asciidoc',
|
||||
'en/ml/functions/info.asciidoc',
|
||||
'en/ml/functions/metric.asciidoc',
|
||||
'en/ml/functions/rare.asciidoc',
|
||||
'en/ml/functions/sum.asciidoc',
|
||||
'en/ml/functions/time.asciidoc',
|
||||
'en/ml/aggregations.asciidoc',
|
||||
'en/ml/customurl.asciidoc',
|
||||
'en/monitoring/indices.asciidoc',
|
||||
'en/rest-api/security/ssl.asciidoc',
|
||||
'en/rest-api/security/users.asciidoc',
|
||||
'en/rest-api/security/tokens.asciidoc',
|
||||
'en/rest-api/watcher/put-watch.asciidoc',
|
||||
'en/security/authentication/user-cache.asciidoc',
|
||||
'en/security/authorization/field-and-document-access-control.asciidoc',
|
||||
'en/security/authorization/run-as-privilege.asciidoc',
|
||||
'en/security/ccs-clients-integrations/http.asciidoc',
|
||||
'en/security/authorization/custom-roles-provider.asciidoc',
|
||||
'en/watcher/actions/email.asciidoc',
|
||||
'en/watcher/actions/hipchat.asciidoc',
|
||||
'en/watcher/actions/index.asciidoc',
|
||||
'en/watcher/actions/logging.asciidoc',
|
||||
'en/watcher/actions/pagerduty.asciidoc',
|
||||
'en/watcher/actions/slack.asciidoc',
|
||||
'en/watcher/actions/jira.asciidoc',
|
||||
'en/watcher/actions/webhook.asciidoc',
|
||||
'en/watcher/condition/always.asciidoc',
|
||||
'en/watcher/condition/array-compare.asciidoc',
|
||||
'en/watcher/condition/compare.asciidoc',
|
||||
'en/watcher/condition/never.asciidoc',
|
||||
'en/watcher/condition/script.asciidoc',
|
||||
'en/watcher/customizing-watches.asciidoc',
|
||||
'en/watcher/example-watches/example-watch-meetupdata.asciidoc',
|
||||
'en/watcher/how-watcher-works.asciidoc',
|
||||
'en/watcher/input/chain.asciidoc',
|
||||
'en/watcher/input/http.asciidoc',
|
||||
'en/watcher/input/search.asciidoc',
|
||||
'en/watcher/input/simple.asciidoc',
|
||||
'en/watcher/transform.asciidoc',
|
||||
'en/watcher/transform/chain.asciidoc',
|
||||
'en/watcher/transform/script.asciidoc',
|
||||
'en/watcher/transform/search.asciidoc',
|
||||
'en/watcher/trigger/schedule/cron.asciidoc',
|
||||
'en/watcher/trigger/schedule/daily.asciidoc',
|
||||
'en/watcher/trigger/schedule/hourly.asciidoc',
|
||||
'en/watcher/trigger/schedule/interval.asciidoc',
|
||||
'en/watcher/trigger/schedule/monthly.asciidoc',
|
||||
'en/watcher/trigger/schedule/weekly.asciidoc',
|
||||
'en/watcher/trigger/schedule/yearly.asciidoc',
|
||||
'en/watcher/troubleshooting.asciidoc',
|
||||
'en/rest-api/license/delete-license.asciidoc',
|
||||
'en/rest-api/license/start-trial.asciidoc',
|
||||
'en/rest-api/license/update-license.asciidoc',
|
||||
'en/ml/api-quickref.asciidoc',
|
||||
'en/rest-api/ml/delete-calendar-event.asciidoc',
|
||||
'en/rest-api/ml/delete-snapshot.asciidoc',
|
||||
'en/rest-api/ml/forecast.asciidoc',
|
||||
'en/rest-api/ml/get-bucket.asciidoc',
|
||||
'en/rest-api/ml/get-job-stats.asciidoc',
|
||||
'en/rest-api/ml/get-overall-buckets.asciidoc',
|
||||
'en/rest-api/ml/get-category.asciidoc',
|
||||
'en/rest-api/ml/get-record.asciidoc',
|
||||
'en/rest-api/ml/get-influencer.asciidoc',
|
||||
'en/rest-api/ml/get-snapshot.asciidoc',
|
||||
'en/rest-api/ml/post-data.asciidoc',
|
||||
'en/rest-api/ml/preview-datafeed.asciidoc',
|
||||
'en/rest-api/ml/revert-snapshot.asciidoc',
|
||||
'en/rest-api/ml/update-snapshot.asciidoc',
|
||||
'en/rest-api/ml/validate-detector.asciidoc',
|
||||
'en/rest-api/ml/validate-job.asciidoc',
|
||||
'en/rest-api/security/authenticate.asciidoc',
|
||||
'en/rest-api/watcher/stats.asciidoc',
|
||||
'en/security/authorization.asciidoc',
|
||||
'en/watcher/example-watches/watching-time-series-data.asciidoc',
|
||||
]
|
||||
|
||||
dependencies {
|
||||
testCompile project(path: xpackModule('core'), configuration: 'runtime')
|
||||
testCompile project(path: xpackModule('core'), configuration: 'testArtifacts')
|
||||
testCompile project(path: xpackProject('plugin').path, configuration: 'testArtifacts')
|
||||
}
|
||||
|
||||
Closure waitWithAuth = { NodeInfo node, AntBuilder ant ->
|
||||
File tmpFile = new File(node.cwd, 'wait.success')
|
||||
// wait up to twenty seconds
|
||||
final long stopTime = System.currentTimeMillis() + 20000L;
|
||||
Exception lastException = null;
|
||||
while (System.currentTimeMillis() < stopTime) {
|
||||
lastException = null;
|
||||
// we use custom wait logic here as the elastic user is not available immediately and ant.get will fail when a 401 is returned
|
||||
HttpURLConnection httpURLConnection = null;
|
||||
try {
|
||||
httpURLConnection = (HttpURLConnection) new URL("http://${node.httpUri()}/_cluster/health").openConnection();
|
||||
httpURLConnection.setRequestProperty("Authorization", "Basic " +
|
||||
Base64.getEncoder().encodeToString("test_admin:x-pack-test-password".getBytes(StandardCharsets.UTF_8)));
|
||||
httpURLConnection.setRequestMethod("GET");
|
||||
httpURLConnection.setConnectTimeout(1000);
|
||||
httpURLConnection.setReadTimeout(30000);
|
||||
httpURLConnection.connect();
|
||||
if (httpURLConnection.getResponseCode() == 200) {
|
||||
tmpFile.withWriter StandardCharsets.UTF_8.name(), {
|
||||
it.write(httpURLConnection.getInputStream().getText(StandardCharsets.UTF_8.name()))
|
||||
}
|
||||
break;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.debug("failed to call cluster health", e)
|
||||
lastException = e
|
||||
} finally {
|
||||
if (httpURLConnection != null) {
|
||||
httpURLConnection.disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
// did not start, so wait a bit before trying again
|
||||
Thread.sleep(500L);
|
||||
}
|
||||
if (tmpFile.exists() == false && lastException != null) {
|
||||
logger.error("final attempt of calling cluster health failed", lastException)
|
||||
}
|
||||
return tmpFile.exists()
|
||||
}
|
||||
|
||||
integTestCluster {
|
||||
plugin xpackProject('plugin').path
|
||||
setting 'xpack.security.enabled', 'true'
|
||||
setting 'xpack.security.authc.token.enabled', 'true'
|
||||
// Disable monitoring exporters for the docs tests
|
||||
setting 'xpack.monitoring.exporters._local.type', 'local'
|
||||
setting 'xpack.monitoring.exporters._local.enabled', 'false'
|
||||
setting 'xpack.license.self_generated.type', 'trial'
|
||||
setupCommand 'setupTestAdmin',
|
||||
'bin/x-pack/users', 'useradd', 'test_admin', '-p', 'x-pack-test-password', '-r', 'superuser'
|
||||
waitCondition = waitWithAuth
|
||||
}
|
||||
|
||||
|
||||
|
||||
buildRestTests.docs = fileTree(projectDir) {
|
||||
// No snippets in here!
|
||||
exclude 'build.gradle'
|
||||
// That is where the snippets go, not where they come from!
|
||||
exclude 'build'
|
||||
// These file simply doesn't pass yet. We should figure out how to fix them.
|
||||
exclude 'en/watcher/reference/actions.asciidoc'
|
||||
exclude 'en/rest-api/graph/explore.asciidoc'
|
||||
}
|
||||
|
||||
Map<String, String> setups = buildRestTests.setups
|
||||
setups['my_inactive_watch'] = '''
|
||||
- do:
|
||||
xpack.watcher.put_watch:
|
||||
id: "my_watch"
|
||||
active: false
|
||||
body: >
|
||||
{
|
||||
"trigger": {
|
||||
"schedule": {
|
||||
"hourly": {
|
||||
"minute": [ 0, 5 ]
|
||||
}
|
||||
}
|
||||
},
|
||||
"input": {
|
||||
"simple": {
|
||||
"payload": {
|
||||
"send": "yes"
|
||||
}
|
||||
}
|
||||
},
|
||||
"condition": {
|
||||
"always": {}
|
||||
},
|
||||
"actions": {
|
||||
"test_index": {
|
||||
"index": {
|
||||
"index": "test",
|
||||
"doc_type": "test2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
- match: { _id: "my_watch" }
|
||||
'''
|
||||
setups['my_active_watch'] = setups['my_inactive_watch'].replace(
|
||||
'active: false', 'active: true')
|
||||
|
||||
// Used by SQL because it looks SQL-ish
|
||||
setups['library'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: library
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 1
|
||||
mappings:
|
||||
book:
|
||||
properties:
|
||||
name:
|
||||
type: text
|
||||
fields:
|
||||
keyword:
|
||||
type: keyword
|
||||
author:
|
||||
type: text
|
||||
fields:
|
||||
keyword:
|
||||
type: keyword
|
||||
release_date:
|
||||
type: date
|
||||
page_count:
|
||||
type: short
|
||||
- do:
|
||||
bulk:
|
||||
index: library
|
||||
type: book
|
||||
refresh: true
|
||||
body: |
|
||||
{"index":{"_id": "Leviathan Wakes"}}
|
||||
{"name": "Leviathan Wakes", "author": "James S.A. Corey", "release_date": "2011-06-02", "page_count": 561}
|
||||
{"index":{"_id": "Hyperion"}}
|
||||
{"name": "Hyperion", "author": "Dan Simmons", "release_date": "1989-05-26", "page_count": 482}
|
||||
{"index":{"_id": "Dune"}}
|
||||
{"name": "Dune", "author": "Frank Herbert", "release_date": "1965-06-01", "page_count": 604}
|
||||
{"index":{"_id": "Dune Messiah"}}
|
||||
{"name": "Dune Messiah", "author": "Frank Herbert", "release_date": "1969-10-15", "page_count": 331}
|
||||
{"index":{"_id": "Children of Dune"}}
|
||||
{"name": "Children of Dune", "author": "Frank Herbert", "release_date": "1976-04-21", "page_count": 408}
|
||||
{"index":{"_id": "God Emperor of Dune"}}
|
||||
{"name": "God Emperor of Dune", "author": "Frank Herbert", "release_date": "1981-05-28", "page_count": 454}
|
||||
{"index":{"_id": "Consider Phlebas"}}
|
||||
{"name": "Consider Phlebas", "author": "Iain M. Banks", "release_date": "1987-04-23", "page_count": 471}
|
||||
{"index":{"_id": "Pandora's Star"}}
|
||||
{"name": "Pandora's Star", "author": "Peter F. Hamilton", "release_date": "2004-03-02", "page_count": 768}
|
||||
{"index":{"_id": "Revelation Space"}}
|
||||
{"name": "Revelation Space", "author": "Alastair Reynolds", "release_date": "2000-03-15", "page_count": 585}
|
||||
{"index":{"_id": "A Fire Upon the Deep"}}
|
||||
{"name": "A Fire Upon the Deep", "author": "Vernor Vinge", "release_date": "1992-06-01", "page_count": 613}
|
||||
{"index":{"_id": "Ender's Game"}}
|
||||
{"name": "Ender's Game", "author": "Orson Scott Card", "release_date": "1985-06-01", "page_count": 324}
|
||||
{"index":{"_id": "1984"}}
|
||||
{"name": "1984", "author": "George Orwell", "release_date": "1985-06-01", "page_count": 328}
|
||||
{"index":{"_id": "Fahrenheit 451"}}
|
||||
{"name": "Fahrenheit 451", "author": "Ray Bradbury", "release_date": "1953-10-15", "page_count": 227}
|
||||
{"index":{"_id": "Brave New World"}}
|
||||
{"name": "Brave New World", "author": "Aldous Huxley", "release_date": "1932-06-01", "page_count": 268}
|
||||
{"index":{"_id": "Foundation"}}
|
||||
{"name": "Foundation", "author": "Isaac Asimov", "release_date": "1951-06-01", "page_count": 224}
|
||||
{"index":{"_id": "The Giver"}}
|
||||
{"name": "The Giver", "author": "Lois Lowry", "release_date": "1993-04-26", "page_count": 208}
|
||||
{"index":{"_id": "Slaughterhouse-Five"}}
|
||||
{"name": "Slaughterhouse-Five", "author": "Kurt Vonnegut", "release_date": "1969-06-01", "page_count": 275}
|
||||
{"index":{"_id": "The Hitchhiker's Guide to the Galaxy"}}
|
||||
{"name": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "release_date": "1979-10-12", "page_count": 180}
|
||||
{"index":{"_id": "Snow Crash"}}
|
||||
{"name": "Snow Crash", "author": "Neal Stephenson", "release_date": "1992-06-01", "page_count": 470}
|
||||
{"index":{"_id": "Neuromancer"}}
|
||||
{"name": "Neuromancer", "author": "William Gibson", "release_date": "1984-07-01", "page_count": 271}
|
||||
{"index":{"_id": "The Handmaid's Tale"}}
|
||||
{"name": "The Handmaid's Tale", "author": "Margaret Atwood", "release_date": "1985-06-01", "page_count": 311}
|
||||
{"index":{"_id": "Starship Troopers"}}
|
||||
{"name": "Starship Troopers", "author": "Robert A. Heinlein", "release_date": "1959-12-01", "page_count": 335}
|
||||
{"index":{"_id": "The Left Hand of Darkness"}}
|
||||
{"name": "The Left Hand of Darkness", "author": "Ursula K. Le Guin", "release_date": "1969-06-01", "page_count": 304}
|
||||
{"index":{"_id": "The Moon is a Harsh Mistress"}}
|
||||
{"name": "The Moon is a Harsh Mistress", "author": "Robert A. Heinlein", "release_date": "1966-04-01", "page_count": 288}
|
||||
|
||||
'''
|
||||
setups['server_metrics_index'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: server-metrics
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
metric:
|
||||
properties:
|
||||
timestamp:
|
||||
type: date
|
||||
total:
|
||||
type: long
|
||||
'''
|
||||
setups['server_metrics_data'] = setups['server_metrics_index'] + '''
|
||||
- do:
|
||||
bulk:
|
||||
index: server-metrics
|
||||
type: metric
|
||||
refresh: true
|
||||
body: |
|
||||
{"index": {"_id":"1177"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":40476}
|
||||
{"index": {"_id":"1178"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":15287}
|
||||
{"index": {"_id":"1179"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":-776}
|
||||
{"index": {"_id":"1180"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":11366}
|
||||
{"index": {"_id":"1181"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":3606}
|
||||
{"index": {"_id":"1182"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":19006}
|
||||
{"index": {"_id":"1183"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":38613}
|
||||
{"index": {"_id":"1184"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":19516}
|
||||
{"index": {"_id":"1185"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":-258}
|
||||
{"index": {"_id":"1186"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":9551}
|
||||
{"index": {"_id":"1187"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":11217}
|
||||
{"index": {"_id":"1188"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":22557}
|
||||
{"index": {"_id":"1189"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":40508}
|
||||
{"index": {"_id":"1190"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":11887}
|
||||
{"index": {"_id":"1191"}}
|
||||
{"timestamp":"2017-03-23T13:00:00","total":31659}
|
||||
'''
|
||||
setups['server_metrics_job'] = setups['server_metrics_data'] + '''
|
||||
- do:
|
||||
xpack.ml.put_job:
|
||||
job_id: "total-requests"
|
||||
body: >
|
||||
{
|
||||
"description" : "Total sum of requests",
|
||||
"analysis_config" : {
|
||||
"bucket_span":"10m",
|
||||
"detectors" :[
|
||||
{
|
||||
"detector_description": "Sum of total",
|
||||
"function": "sum",
|
||||
"field_name": "total"
|
||||
}
|
||||
]},
|
||||
"data_description" : {
|
||||
"time_field":"timestamp",
|
||||
"time_format": "epoch_ms"
|
||||
}
|
||||
}
|
||||
'''
|
||||
setups['server_metrics_datafeed'] = setups['server_metrics_job'] + '''
|
||||
- do:
|
||||
xpack.ml.put_datafeed:
|
||||
datafeed_id: "datafeed-total-requests"
|
||||
body: >
|
||||
{
|
||||
"job_id":"total-requests",
|
||||
"indexes":"server-metrics"
|
||||
}
|
||||
'''
|
||||
setups['server_metrics_openjob'] = setups['server_metrics_datafeed'] + '''
|
||||
- do:
|
||||
xpack.ml.open_job:
|
||||
job_id: "total-requests"
|
||||
'''
|
||||
setups['server_metrics_startdf'] = setups['server_metrics_openjob'] + '''
|
||||
- do:
|
||||
xpack.ml.start_datafeed:
|
||||
datafeed_id: "datafeed-total-requests"
|
||||
'''
|
||||
setups['calendar_outages'] = '''
|
||||
- do:
|
||||
xpack.ml.put_calendar:
|
||||
calendar_id: "planned-outages"
|
||||
'''
|
||||
setups['calendar_outages_addevent'] = setups['calendar_outages'] + '''
|
||||
- do:
|
||||
xpack.ml.post_calendar_events:
|
||||
calendar_id: "planned-outages"
|
||||
body: >
|
||||
{ "description": "event 1", "start_time": "2017-12-01T00:00:00Z", "end_time": "2017-12-02T00:00:00Z", "calendar_id": "planned-outages" }
|
||||
|
||||
|
||||
'''
|
||||
setups['calendar_outages_openjob'] = setups['server_metrics_openjob'] + '''
|
||||
- do:
|
||||
xpack.ml.put_calendar:
|
||||
calendar_id: "planned-outages"
|
||||
'''
|
||||
setups['calendar_outages_addjob'] = setups['server_metrics_openjob'] + '''
|
||||
- do:
|
||||
xpack.ml.put_calendar:
|
||||
calendar_id: "planned-outages"
|
||||
body: >
|
||||
{
|
||||
"job_ids": ["total-requests"]
|
||||
}
|
||||
'''
|
||||
setups['calendar_outages_addevent'] = setups['calendar_outages_addjob'] + '''
|
||||
- do:
|
||||
xpack.ml.post_calendar_events:
|
||||
calendar_id: "planned-outages"
|
||||
body: >
|
||||
{ "events" : [
|
||||
{ "description": "event 1", "start_time": "1513641600000", "end_time": "1513728000000"},
|
||||
{ "description": "event 2", "start_time": "1513814400000", "end_time": "1513900800000"},
|
||||
{ "description": "event 3", "start_time": "1514160000000", "end_time": "1514246400000"}
|
||||
]}
|
||||
'''
|
||||
setups['role_mapping'] = '''
|
||||
- do:
|
||||
xpack.security.put_role_mapping:
|
||||
name: "mapping1"
|
||||
body: >
|
||||
{
|
||||
"enabled": true,
|
||||
"roles": [ "user" ],
|
||||
"rules": { "field": { "username": "*" } }
|
||||
}
|
||||
'''
|
||||
setups['sensor_rollup_job'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: sensor-1
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
_doc:
|
||||
properties:
|
||||
timestamp:
|
||||
type: date
|
||||
temperature:
|
||||
type: long
|
||||
voltage:
|
||||
type: float
|
||||
node:
|
||||
type: keyword
|
||||
- do:
|
||||
xpack.rollup.put_job:
|
||||
id: "sensor"
|
||||
body: >
|
||||
{
|
||||
"index_pattern": "sensor-*",
|
||||
"rollup_index": "sensor_rollup",
|
||||
"cron": "*/30 * * * * ?",
|
||||
"page_size" :1000,
|
||||
"groups" : {
|
||||
"date_histogram": {
|
||||
"field": "timestamp",
|
||||
"interval": "1h",
|
||||
"delay": "7d"
|
||||
},
|
||||
"terms": {
|
||||
"fields": ["node"]
|
||||
}
|
||||
},
|
||||
"metrics": [
|
||||
{
|
||||
"field": "temperature",
|
||||
"metrics": ["min", "max", "sum"]
|
||||
},
|
||||
{
|
||||
"field": "voltage",
|
||||
"metrics": ["avg"]
|
||||
}
|
||||
]
|
||||
}
|
||||
'''
|
||||
setups['sensor_started_rollup_job'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: sensor-1
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
_doc:
|
||||
properties:
|
||||
timestamp:
|
||||
type: date
|
||||
temperature:
|
||||
type: long
|
||||
voltage:
|
||||
type: float
|
||||
node:
|
||||
type: keyword
|
||||
|
||||
- do:
|
||||
bulk:
|
||||
index: sensor-1
|
||||
type: _doc
|
||||
refresh: true
|
||||
body: |
|
||||
{"index":{}}
|
||||
{"timestamp": 1516729294000, "temperature": 200, "voltage": 5.2, "node": "a"}
|
||||
{"index":{}}
|
||||
{"timestamp": 1516642894000, "temperature": 201, "voltage": 5.8, "node": "b"}
|
||||
{"index":{}}
|
||||
{"timestamp": 1516556494000, "temperature": 202, "voltage": 5.1, "node": "a"}
|
||||
{"index":{}}
|
||||
{"timestamp": 1516470094000, "temperature": 198, "voltage": 5.6, "node": "b"}
|
||||
{"index":{}}
|
||||
{"timestamp": 1516383694000, "temperature": 200, "voltage": 4.2, "node": "c"}
|
||||
{"index":{}}
|
||||
{"timestamp": 1516297294000, "temperature": 202, "voltage": 4.0, "node": "c"}
|
||||
|
||||
- do:
|
||||
xpack.rollup.put_job:
|
||||
id: "sensor"
|
||||
body: >
|
||||
{
|
||||
"index_pattern": "sensor-*",
|
||||
"rollup_index": "sensor_rollup",
|
||||
"cron": "* * * * * ?",
|
||||
"page_size" :1000,
|
||||
"groups" : {
|
||||
"date_histogram": {
|
||||
"field": "timestamp",
|
||||
"interval": "1h",
|
||||
"delay": "7d"
|
||||
},
|
||||
"terms": {
|
||||
"fields": ["node"]
|
||||
}
|
||||
},
|
||||
"metrics": [
|
||||
{
|
||||
"field": "temperature",
|
||||
"metrics": ["min", "max", "sum"]
|
||||
},
|
||||
{
|
||||
"field": "voltage",
|
||||
"metrics": ["avg"]
|
||||
}
|
||||
]
|
||||
}
|
||||
- do:
|
||||
xpack.rollup.start_job:
|
||||
id: "sensor"
|
||||
'''
|
||||
|
||||
setups['sensor_index'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: sensor-1
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
_doc:
|
||||
properties:
|
||||
timestamp:
|
||||
type: date
|
||||
temperature:
|
||||
type: long
|
||||
voltage:
|
||||
type: float
|
||||
node:
|
||||
type: keyword
|
||||
load:
|
||||
type: double
|
||||
net_in:
|
||||
type: long
|
||||
net_out:
|
||||
type: long
|
||||
hostname:
|
||||
type: keyword
|
||||
datacenter:
|
||||
type: keyword
|
||||
'''
|
||||
|
||||
setups['sensor_prefab_data'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: sensor-1
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
_doc:
|
||||
properties:
|
||||
timestamp:
|
||||
type: date
|
||||
temperature:
|
||||
type: long
|
||||
voltage:
|
||||
type: float
|
||||
node:
|
||||
type: keyword
|
||||
- do:
|
||||
indices.create:
|
||||
index: sensor_rollup
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
_doc:
|
||||
properties:
|
||||
node.terms.value:
|
||||
type: keyword
|
||||
temperature.sum.value:
|
||||
type: double
|
||||
temperature.max.value:
|
||||
type: double
|
||||
temperature.min.value:
|
||||
type: double
|
||||
timestamp.date_histogram.time_zone:
|
||||
type: keyword
|
||||
timestamp.date_histogram.interval:
|
||||
type: keyword
|
||||
timestamp.date_histogram.timestamp:
|
||||
type: date
|
||||
timestamp.date_histogram._count:
|
||||
type: long
|
||||
voltage.avg.value:
|
||||
type: double
|
||||
voltage.avg._count:
|
||||
type: long
|
||||
_rollup.id:
|
||||
type: keyword
|
||||
_rollup.version:
|
||||
type: long
|
||||
_meta:
|
||||
_rollup:
|
||||
sensor:
|
||||
cron: "* * * * * ?"
|
||||
rollup_index: "sensor_rollup"
|
||||
index_pattern: "sensor-*"
|
||||
timeout: "20s"
|
||||
page_size: 1000
|
||||
groups:
|
||||
date_histogram:
|
||||
delay: "7d"
|
||||
field: "timestamp"
|
||||
interval: "1h"
|
||||
time_zone: "UTC"
|
||||
terms:
|
||||
fields:
|
||||
- "node"
|
||||
id: sensor
|
||||
metrics:
|
||||
- field: "temperature"
|
||||
metrics:
|
||||
- min
|
||||
- max
|
||||
- sum
|
||||
- field: "voltage"
|
||||
metrics:
|
||||
- avg
|
||||
|
||||
- do:
|
||||
bulk:
|
||||
index: sensor_rollup
|
||||
type: _doc
|
||||
refresh: true
|
||||
body: |
|
||||
{"index":{}}
|
||||
{"node.terms.value":"b","temperature.sum.value":201.0,"temperature.max.value":201.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":201.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.800000190734863,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516640400000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
{"index":{}}
|
||||
{"node.terms.value":"c","temperature.sum.value":200.0,"temperature.max.value":200.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":200.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":4.199999809265137,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516381200000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
{"index":{}}
|
||||
{"node.terms.value":"a","temperature.sum.value":202.0,"temperature.max.value":202.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":202.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.099999904632568,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516554000000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
{"index":{}}
|
||||
{"node.terms.value":"a","temperature.sum.value":200.0,"temperature.max.value":200.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":200.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.199999809265137,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516726800000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
{"index":{}}
|
||||
{"node.terms.value":"b","temperature.sum.value":198.0,"temperature.max.value":198.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":198.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.599999904632568,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516467600000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
{"index":{}}
|
||||
{"node.terms.value":"c","temperature.sum.value":202.0,"temperature.max.value":202.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":202.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":4.0,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516294800000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
|
||||
'''
|
|
@ -0,0 +1,157 @@
|
|||
[role="xpack"]
|
||||
[[certgen]]
|
||||
== certgen
|
||||
|
||||
deprecated[6.1,Replaced by <<certutil,`certutil`>>.]
|
||||
|
||||
The `certgen` command simplifies the creation of certificate authorities (CA),
|
||||
certificate signing requests (CSR), and signed certificates for use with the
|
||||
Elastic Stack. Though this command is deprecated, you do not need to replace CA,
|
||||
CSR, or certificates that it created.
|
||||
|
||||
[float]
|
||||
=== Synopsis
|
||||
|
||||
[source,shell]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/certgen
|
||||
(([--cert <cert_file>] [--days <n>] [--dn <name>] [--key <key_file>]
|
||||
[--keysize <bits>] [--pass <password>] [--p12 <password>])
|
||||
| [--csr])
|
||||
[-E <KeyValuePair>] [-h, --help] [--in <input_file>] [--out <output_file>]
|
||||
([-s, --silent] | [-v, --verbose])
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Description
|
||||
|
||||
By default, the command runs in interactive mode and you are prompted for
|
||||
information about each instance. An instance is any piece of the Elastic Stack
|
||||
that requires a Transport Layer Security (TLS) or SSL certificate. Depending on
|
||||
your configuration, {es}, Logstash, {kib}, and Beats might all require a
|
||||
certificate and private key.
|
||||
|
||||
The minimum required value for each instance is a name. This can simply be the
|
||||
hostname, which is used as the Common Name of the certificate. You can also use
|
||||
a full distinguished name. IP addresses and DNS names are optional. Multiple
|
||||
values can be specified as a comma separated string. If no IP addresses or DNS
|
||||
names are provided, you might disable hostname verification in your TLS or SSL
|
||||
configuration.
|
||||
|
||||
Depending on the parameters that you specify, you are also prompted for
|
||||
necessary information such as the path for the output file and the CA private
|
||||
key password.
|
||||
|
||||
The `certgen` command also supports a silent mode of operation to enable easier
|
||||
batch operations. For more information, see <<certgen-silent>>.
|
||||
|
||||
The output file is a zip file that contains the signed certificates and private
|
||||
keys for each instance. If you chose to generate a CA, which is the default
|
||||
behavior, the certificate and private key are included in the output file. If
|
||||
you chose to generate CSRs, you should provide them to your commercial or
|
||||
organization-specific certificate authority to obtain signed certificates. The
|
||||
signed certificates must be in PEM format to work with {security}.
|
||||
|
||||
[float]
|
||||
=== Parameters
|
||||
|
||||
`--cert <cert_file>`:: Specifies to generate new instance certificates and keys
|
||||
using an existing CA certificate, which is provided in the `<cert_file>` argument.
|
||||
This parameter cannot be used with the `-csr` parameter.
|
||||
|
||||
`--csr`:: Specifies to operate in certificate signing request mode.
|
||||
|
||||
`--days <n>`::
|
||||
Specifies an integer value that represents the number of days the generated keys
|
||||
are valid. The default value is `1095`. This parameter cannot be used with the
|
||||
`-csr` parameter.
|
||||
|
||||
`--dn <name>`::
|
||||
Defines the _Distinguished Name_ that is used for the generated CA certificate.
|
||||
The default value is `CN=Elastic Certificate Tool Autogenerated CA`.
|
||||
This parameter cannot be used with the `-csr` parameter.
|
||||
|
||||
`-E <KeyValuePair>`:: Configures a setting.
|
||||
|
||||
`-h, --help`:: Returns all of the command parameters.
|
||||
|
||||
`--in <input_file>`:: Specifies the file that is used to run in silent mode. The
|
||||
input file must be a YAML file, as described in <<certgen-silent>>.
|
||||
|
||||
`--key <key_file>`:: Specifies the _private-key_ file for the CA certificate.
|
||||
This parameter is required whenever the `-cert` parameter is used.
|
||||
|
||||
`--keysize <bits>`::
|
||||
Defines the number of bits that are used in generated RSA keys. The default
|
||||
value is `2048`.
|
||||
|
||||
`--out <output_file>`:: Specifies a path for the output file.
|
||||
|
||||
`--pass <password>`:: Specifies the password for the CA private key.
|
||||
If the `-key` parameter is provided, then this is the password for the existing
|
||||
private key file. Otherwise, it is the password that should be applied to the
|
||||
generated CA key. This parameter cannot be used with the `-csr` parameter.
|
||||
|
||||
`--p12 <password>`::
|
||||
Generate a PKCS#12 (`.p12` or `.pfx`) container file for each of the instance
|
||||
certificates and keys. The generated file is protected by the supplied password,
|
||||
which can be blank. This parameter cannot be used with the `-csr` parameter.
|
||||
|
||||
`-s, --silent`:: Shows minimal output.
|
||||
|
||||
`-v, --verbose`:: Shows verbose output.
|
||||
|
||||
[float]
|
||||
=== Examples
|
||||
|
||||
[float]
|
||||
[[certgen-silent]]
|
||||
==== Using `certgen` in Silent Mode
|
||||
|
||||
To use the silent mode of operation, you must create a YAML file that contains
|
||||
information about the instances. It must match the following format:
|
||||
|
||||
[source, yaml]
|
||||
--------------------------------------------------
|
||||
instances:
|
||||
- name: "node1" <1>
|
||||
ip: <2>
|
||||
- "192.0.2.1"
|
||||
dns: <3>
|
||||
- "node1.mydomain.com"
|
||||
- name: "node2"
|
||||
ip:
|
||||
- "192.0.2.2"
|
||||
- "198.51.100.1"
|
||||
- name: "node3"
|
||||
- name: "node4"
|
||||
dns:
|
||||
- "node4.mydomain.com"
|
||||
- "node4.internal"
|
||||
- name: "CN=node5,OU=IT,DC=mydomain,DC=com"
|
||||
filename: "node5" <4>
|
||||
--------------------------------------------------
|
||||
<1> The name of the instance. This can be a simple string value or can be a
|
||||
Distinguished Name (DN). This is the only required field.
|
||||
<2> An optional array of strings that represent IP Addresses for this instance.
|
||||
Both IPv4 and IPv6 values are allowed. The values are added as Subject
|
||||
Alternative Names.
|
||||
<3> An optional array of strings that represent DNS names for this instance.
|
||||
The values are added as Subject Alternative Names.
|
||||
<4> The filename to use for this instance. This name is used as the name of the
|
||||
directory that contains the instance's files in the output. It is also used in
|
||||
the names of the files within the directory. This filename should not have an
|
||||
extension. Note: If the `name` provided for the instance does not represent a
|
||||
valid filename, then the `filename` field must be present.
|
||||
|
||||
When your YAML file is ready, you can use the `certgen` command to generate
|
||||
certificates or certificate signing requests. Simply use the `-in` parameter to
|
||||
specify the location of the file. For example:
|
||||
|
||||
[source, sh]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/certgen -in instances.yml
|
||||
--------------------------------------------------
|
||||
|
||||
This command generates a CA certificate and private key as well as certificates
|
||||
and private keys for the instances that are listed in the YAML file.
|
|
@ -0,0 +1,289 @@
|
|||
[role="xpack"]
|
||||
[[certutil]]
|
||||
== certutil
|
||||
|
||||
The `certutil` command simplifies the creation of certificates for use with
|
||||
Transport Layer Security (TLS) in the Elastic Stack.
|
||||
|
||||
[float]
|
||||
=== Synopsis
|
||||
|
||||
[source,shell]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/certutil
|
||||
(
|
||||
(ca [--ca-dn <name>] [--days <n>] [--pem])
|
||||
|
||||
| (cert ([--ca <file_path>] | [--ca-cert <file_path> --ca-key <file_path>])
|
||||
[--ca-dn <name>] [--ca-pass <password>] [--days <n>]
|
||||
[--dns <domain_name>] [--in <input_file>] [--ip <ip_addresses>]
|
||||
[--keep-ca-key] [--multiple] [--name <file_name>] [--pem])
|
||||
|
||||
| (csr [--dns <domain_name>] [--in <input_file>] [--ip <ip_addresses>]
|
||||
[--name <file_name>])
|
||||
|
||||
[-E <KeyValuePair>] [--keysize <bits>] [--out <file_path>]
|
||||
[--pass <password>]
|
||||
)
|
||||
[-h, --help] ([-s, --silent] | [-v, --verbose])
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Description
|
||||
|
||||
You can specify one of the following modes: `ca`, `cert`, `csr`. The `certutil`
|
||||
command also supports a silent mode of operation to enable easier batch
|
||||
operations.
|
||||
|
||||
[float]
|
||||
[[certutil-ca]]
|
||||
==== CA mode
|
||||
|
||||
The `ca` mode generates a new certificate authority (CA). By default, it
|
||||
produces a single PKCS#12 output file, which holds the CA certificate and the
|
||||
private key for the CA. If you specify the `--pem` parameter, the command
|
||||
generates a zip file, which contains the certificate and private key in PEM
|
||||
format.
|
||||
|
||||
You can subsequently use these files as input for the `cert` mode of the command.
|
||||
|
||||
[float]
|
||||
[[certutil-cert]]
|
||||
==== CERT mode
|
||||
|
||||
The `cert` mode generates X.509 certificates and private keys. By default, it
|
||||
produces a single certificate and key for use on a single instance.
|
||||
|
||||
To generate certificates and keys for multiple instances, specify the
|
||||
`--multiple` parameter, which prompts you for details about each instance.
|
||||
Alternatively, you can use the `--in` parameter to specify a YAML file that
|
||||
contains details about the instances.
|
||||
|
||||
An instance is any piece of the Elastic Stack that requires a TLS or SSL
|
||||
certificate. Depending on your configuration, {es}, Logstash, {kib}, and Beats
|
||||
might all require a certificate and private key. The minimum required
|
||||
information for an instance is its name, which is used as the common name for
|
||||
the certificate. The instance name can be a hostname value or a full
|
||||
distinguished name. If the instance name would result in an invalid file or
|
||||
directory name, you must also specify a file name in the `--name` command
|
||||
parameter or in the `filename` field in an input YAML file.
|
||||
|
||||
You can optionally provide IP addresses or DNS names for each instance. If
|
||||
neither IP addresses nor DNS names are specified, the Elastic stack products
|
||||
cannot perform hostname verification and you might need to configure the
|
||||
`verfication_mode` security setting to `certificate` only. For more information
|
||||
about this setting, see <<security-settings>>.
|
||||
|
||||
All certificates that are generated by this command are signed by a CA. You can
|
||||
provide your own CA with the `--ca` or `--ca-cert` parameters. Otherwise, the
|
||||
command automatically generates a new CA for you. For more information about
|
||||
generating a CA, see the <<certutil-ca,CA mode of this command>>.
|
||||
|
||||
By default, the `cert` mode produces a single PKCS#12 output file which holds
|
||||
the instance certificate, the instance private key, and the CA certificate. If
|
||||
you specify the `--pem` parameter, the command generates PEM formatted
|
||||
certificates and keys and packages them into a zip file.
|
||||
If you specify the `--keep-ca-key`, `--multiple` or `--in` parameters,
|
||||
the command produces a zip file containing the generated certificates and keys.
|
||||
|
||||
[float]
|
||||
[[certutil-csr]]
|
||||
==== CSR mode
|
||||
|
||||
The `csr` mode generates certificate signing requests (CSRs) that you can send
|
||||
to a trusted certificate authority to obtain signed certificates. The signed
|
||||
certificates must be in PEM or PKCS#12 format to work with {security}.
|
||||
|
||||
By default, the command produces a single CSR for a single instance.
|
||||
|
||||
To generate CSRs for multiple instances, specify the `--multiple` parameter,
|
||||
which prompts you for details about each instance. Alternatively, you can use
|
||||
the `--in` parameter to specify a YAML file that contains details about the
|
||||
instances.
|
||||
|
||||
The `cert` mode produces a single zip file which contains the CSRs and the
|
||||
private keys for each instance. Each CSR is provided as a standard PEM
|
||||
encoding of a PKCS#10 CSR. Each key is provided as a PEM encoding of an RSA
|
||||
private key.
|
||||
|
||||
[float]
|
||||
=== Parameters
|
||||
|
||||
`ca`:: Specifies to generate a new local certificate authority (CA). This
|
||||
parameter cannot be used with the `csr` or `cert` parameters.
|
||||
|
||||
`cert`:: Specifies to generate new X.509 certificates and keys.
|
||||
This parameter cannot be used with the `csr` or `ca` parameters.
|
||||
|
||||
`csr`:: Specifies to generate certificate signing requests. This parameter
|
||||
cannot be used with the `ca` or `cert` parameters.
|
||||
|
||||
`--ca <file_path>`:: Specifies the path to an existing CA key pair
|
||||
(in PKCS#12 format). This parameter cannot be used with the `ca` or `csr` parameters.
|
||||
|
||||
`--ca-cert <file_path>`:: Specifies the path to an existing CA certificate (in
|
||||
PEM format). You must also specify the `--ca-key` parameter. The `--ca-cert`
|
||||
parameter cannot be used with the `ca` or `csr` parameters.
|
||||
|
||||
`--ca-dn <name>`:: Defines the _Distinguished Name_ (DN) that is used for the
|
||||
generated CA certificate. The default value is
|
||||
`CN=Elastic Certificate Tool Autogenerated CA`. This parameter cannot be used
|
||||
with the `csr` parameter.
|
||||
|
||||
`--ca-key <file_path>`:: Specifies the path to an existing CA private key (in
|
||||
PEM format). You must also specify the `--ca-cert` parameter. The `--ca-key`
|
||||
parameter cannot be used with the `ca` or `csr` parameters.
|
||||
|
||||
`--ca-pass <password>`:: Specifies the password for an existing CA private key
|
||||
or the generated CA private key. This parameter cannot be used with the `ca` or
|
||||
`csr` parameters.
|
||||
|
||||
`--days <n>`:: Specifies an integer value that represents the number of days the
|
||||
generated certificates are valid. The default value is `1095`. This parameter
|
||||
cannot be used with the `csr` parameter.
|
||||
|
||||
`--dns <domain_name>`:: Specifies a comma-separated list of DNS names. This
|
||||
parameter cannot be used with the `ca` parameter.
|
||||
|
||||
`-E <KeyValuePair>`:: Configures a setting.
|
||||
|
||||
`-h, --help`:: Returns all of the command parameters.
|
||||
|
||||
`--in <input_file>`:: Specifies the file that is used to run in silent mode. The
|
||||
input file must be a YAML file. This parameter cannot be used with the `ca`
|
||||
parameter.
|
||||
|
||||
`--ip <IP_addresses>`:: Specifies a comma-separated list of IP addresses. This
|
||||
parameter cannot be used with the `ca` parameter.
|
||||
|
||||
`--keep-ca-key`:: When running in `cert` mode with an automatically-generated
|
||||
CA, specifies to retain the CA private key for future use.
|
||||
|
||||
`--keysize <bits>`::
|
||||
Defines the number of bits that are used in generated RSA keys. The default
|
||||
value is `2048`.
|
||||
|
||||
`--multiple`::
|
||||
Specifies to generate files for multiple instances. This parameter cannot be
|
||||
used with the `ca` parameter.
|
||||
|
||||
`--name <file_name>`::
|
||||
Specifies the name of the generated certificate. This parameter cannot be used
|
||||
with the `ca` parameter.
|
||||
|
||||
`--out <file_path>`:: Specifies a path for the output files.
|
||||
|
||||
`--pass <password>`:: Specifies the password for the generated private keys.
|
||||
+
|
||||
Keys stored in PKCS#12 format are always password protected.
|
||||
+
|
||||
Keys stored in PEM format are password protected only if the
|
||||
`--pass` parameter is specified. If you do not supply an argument for the
|
||||
`--pass` parameter, you are prompted for a password.
|
||||
+
|
||||
If you want to specify a _blank_ password (without prompting), use
|
||||
`--pass ""` (with no `=`).
|
||||
|
||||
`--pem`:: Generates certificates and keys in PEM format instead of PKCS#12. This
|
||||
parameter cannot be used with the `csr` parameter.
|
||||
|
||||
`-s, --silent`:: Shows minimal output.
|
||||
|
||||
`-v, --verbose`:: Shows verbose output.
|
||||
|
||||
[float]
|
||||
=== Examples
|
||||
|
||||
The following command generates a CA certificate and private key in PKCS#12
|
||||
format:
|
||||
|
||||
[source, sh]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/certutil ca
|
||||
--------------------------------------------------
|
||||
|
||||
You are prompted for an output filename and a password. Alternatively, you can
|
||||
specify the `--out` and `--pass` parameters.
|
||||
|
||||
You can then generate X.509 certificates and private keys by using the new
|
||||
CA. For example:
|
||||
|
||||
[source, sh]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/certutil cert --ca elastic-stack-ca.p12
|
||||
--------------------------------------------------
|
||||
|
||||
You are prompted for the CA password and for an output filename and password.
|
||||
Alternatively, you can specify the `--ca-pass`, `--out`, and `--pass` parameters.
|
||||
|
||||
By default, this command generates a file called `elastic-certificates.p12`,
|
||||
which you can copy to the relevant configuration directory for each Elastic
|
||||
product that you want to configure. For more information, see
|
||||
{xpack-ref}/ssl-tls.html[Setting Up TLS on a Cluster].
|
||||
|
||||
[float]
|
||||
[[certutil-silent]]
|
||||
==== Using `certutil` in Silent Mode
|
||||
|
||||
To use the silent mode of operation, you must create a YAML file that contains
|
||||
information about the instances. It must match the following format:
|
||||
|
||||
[source, yaml]
|
||||
--------------------------------------------------
|
||||
instances:
|
||||
- name: "node1" <1>
|
||||
ip: <2>
|
||||
- "192.0.2.1"
|
||||
dns: <3>
|
||||
- "node1.mydomain.com"
|
||||
- name: "node2"
|
||||
ip:
|
||||
- "192.0.2.2"
|
||||
- "198.51.100.1"
|
||||
- name: "node3"
|
||||
- name: "node4"
|
||||
dns:
|
||||
- "node4.mydomain.com"
|
||||
- "node4.internal"
|
||||
- name: "CN=node5,OU=IT,DC=mydomain,DC=com"
|
||||
filename: "node5" <4>
|
||||
--------------------------------------------------
|
||||
<1> The name of the instance. This can be a simple string value or can be a
|
||||
Distinguished Name (DN). This is the only required field.
|
||||
<2> An optional array of strings that represent IP Addresses for this instance.
|
||||
Both IPv4 and IPv6 values are allowed. The values are added as Subject
|
||||
Alternative Names.
|
||||
<3> An optional array of strings that represent DNS names for this instance.
|
||||
The values are added as Subject Alternative Names.
|
||||
<4> The filename to use for this instance. This name is used as the name of the
|
||||
directory that contains the instance's files in the output. It is also used in
|
||||
the names of the files within the directory. This filename should not have an
|
||||
extension. Note: If the `name` provided for the instance does not represent a
|
||||
valid filename, then the `filename` field must be present.
|
||||
|
||||
When your YAML file is ready, you can use the `certutil` command to generate
|
||||
certificates or certificate signing requests. Simply use the `--in` parameter to
|
||||
specify the location of the file. For example:
|
||||
|
||||
[source, sh]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/certutil cert --silent --in instances.yml --out test1.zip --pass testpassword
|
||||
--------------------------------------------------
|
||||
|
||||
This command generates a compressed `test1.zip` file. After you decompress the
|
||||
output file, there is a directory for each instance that was listed in the
|
||||
`instances.yml` file. Each instance directory contains a single PKCS#12 (`.p12`)
|
||||
file, which contains the instance certificate, instance private key, and CA
|
||||
certificate.
|
||||
|
||||
You an also use the YAML file to generate certificate signing requests. For
|
||||
example:
|
||||
|
||||
[source, sh]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/certutil csr --silent --in instances.yml --out test2.zip --pass testpassword
|
||||
--------------------------------------------------
|
||||
|
||||
This command generates a compressed file, which contains a directory for each
|
||||
instance. Each instance directory contains a certificate signing request
|
||||
(`*.csr` file) and private key (`*.key` file).
|
|
@ -0,0 +1,26 @@
|
|||
[role="xpack"]
|
||||
[[xpack-commands]]
|
||||
= {xpack} Commands
|
||||
|
||||
[partintro]
|
||||
--
|
||||
|
||||
{xpack} includes commands that help you configure security:
|
||||
|
||||
* <<certgen>>
|
||||
* <<certutil>>
|
||||
* <<migrate-tool>>
|
||||
* <<saml-metadata>>
|
||||
* <<setup-passwords>>
|
||||
* <<syskeygen>>
|
||||
* <<users-command>>
|
||||
|
||||
--
|
||||
|
||||
include::certgen.asciidoc[]
|
||||
include::certutil.asciidoc[]
|
||||
include::migrate-tool.asciidoc[]
|
||||
include::saml-metadata.asciidoc[]
|
||||
include::setup-passwords.asciidoc[]
|
||||
include::syskeygen.asciidoc[]
|
||||
include::users-command.asciidoc[]
|
|
@ -0,0 +1,109 @@
|
|||
[role="xpack"]
|
||||
[[migrate-tool]]
|
||||
== migrate
|
||||
|
||||
The `migrate` command migrates existing file-based users and roles to the native
|
||||
realm. From 5.0 onward, you should use the `native` realm to manage roles and
|
||||
local users.
|
||||
|
||||
|
||||
[float]
|
||||
=== Synopsis
|
||||
|
||||
[source,shell]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/migrate
|
||||
(native (-U, --url <url>)
|
||||
[-h, --help] [-E <KeyValuePair>]
|
||||
[-n, --users <uids>] [-r, --roles <roles>]
|
||||
[-u, --username <uid>] [-p, --password <password>]
|
||||
[-s, --silent] [-v, --verbose])
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Description
|
||||
|
||||
NOTE: When migrating from Shield 2.x, the `migrate` tool should be run prior
|
||||
to upgrading to ensure all roles can be migrated as some may be in a deprecated
|
||||
format that {xpack} cannot read. The `migrate` tool is available in Shield
|
||||
2.4.0 and higher.
|
||||
|
||||
The `migrate` tool loads the existing file-based users and roles and calls the
|
||||
user and roles APIs to add them to the native realm. You can migrate all users
|
||||
and roles, or specify the ones you want to migrate. Users and roles that
|
||||
already exist in the `native` realm are not replaced or overridden. If
|
||||
the names you specify with the `--users` and `--roles` options don't
|
||||
exist in the `file` realm, they are skipped.
|
||||
|
||||
[float]
|
||||
[[migrate-tool-options]]
|
||||
=== Parameters
|
||||
The `native` subcommand supports the following options:
|
||||
|
||||
`-E <KeyValuePair>`::
|
||||
Configures a setting.
|
||||
|
||||
`-h, --help`::
|
||||
Returns all of the command parameters.
|
||||
|
||||
`-n`, `--users`::
|
||||
Comma-separated list of the users that you want to migrate. If this parameter is
|
||||
not specified, all users are migrated.
|
||||
|
||||
`-p`, `--password`::
|
||||
Password to use for authentication with {es}.
|
||||
//TBD: What is the default if this isn't specified?
|
||||
|
||||
`-r`, `--roles`::
|
||||
Comma-separated list of the roles that you want to migrate. If this parameter is
|
||||
not specified, all roles are migrated.
|
||||
|
||||
`-s, --silent`:: Shows minimal output.
|
||||
|
||||
`-U`, `--url`::
|
||||
Endpoint URL of the {es} cluster to which you want to migrate the
|
||||
file-based users and roles. This parameter is required.
|
||||
|
||||
`-u`, `--username`::
|
||||
Username to use for authentication with {es}.
|
||||
//TBD: What is the default if this isn't specified?
|
||||
|
||||
`-v, --verbose`:: Shows verbose output.
|
||||
|
||||
[float]
|
||||
=== Examples
|
||||
|
||||
Run the migrate tool when {xpack} is installed. For example:
|
||||
|
||||
[source, sh]
|
||||
----------------------------------------------------------------------
|
||||
$ bin/x-pack/migrate native -U http://localhost:9200 -u elastic
|
||||
-p x-pack-test-password -n lee,foo -r role1,role2,role3,role4,foo
|
||||
starting migration of users and roles...
|
||||
importing users from [/home/es/config/shield/users]...
|
||||
found existing users: [test_user, joe3, joe2]
|
||||
migrating user [lee]
|
||||
{"user":{"created":true}}
|
||||
no user [foo] found, skipping
|
||||
importing roles from [/home/es/config/shield/roles.yml]...
|
||||
found existing roles: [marvel_user, role_query_fields, admin_role, role3, admin,
|
||||
remote_marvel_agent, power_user, role_new_format_name_array, role_run_as,
|
||||
logstash, role_fields, role_run_as1, role_new_format, kibana4_server, user,
|
||||
transport_client, role1.ab, role_query]
|
||||
migrating role [role1]
|
||||
{"role":{"created":true}}
|
||||
migrating role [role2]
|
||||
{"role":{"created":true}}
|
||||
role [role3] already exists, skipping
|
||||
no role [foo] found, skipping
|
||||
users and roles imported.
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Additionally, the `-E` flag can be used to specify additional settings. For example
|
||||
to specify a different configuration directory, the command would look like:
|
||||
|
||||
[source, sh]
|
||||
----------------------------------------------------------------------
|
||||
$ bin/x-pack/migrate native -U http://localhost:9200 -u elastic
|
||||
-p x-pack-test-password -E path.conf=/etc/elasticsearch
|
||||
----------------------------------------------------------------------
|
|
@ -0,0 +1,132 @@
|
|||
[role="xpack"]
|
||||
[[saml-metadata]]
|
||||
== saml-metadata
|
||||
|
||||
The `saml-metadata` command can be used to generate a SAML 2.0 Service Provider
|
||||
Metadata file.
|
||||
|
||||
[float]
|
||||
=== Synopsis
|
||||
|
||||
[source,shell]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/saml-metadata
|
||||
[--realm <name>]
|
||||
[--out <file_path>] [--batch]
|
||||
[--attribute <name>] [--service-name <name>]
|
||||
[--locale <name>] [--contacts]
|
||||
([--organisation-name <name>] [--organisation-display-name <name>] [--organisation-url <url>])
|
||||
([--signing-bundle <file_path>] | [--signing-cert <file_path>][--signing-key <file_path>])
|
||||
[--signing-key-password <password>]
|
||||
[-E <KeyValuePair>]
|
||||
[-h, --help] ([-s, --silent] | [-v, --verbose])
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Description
|
||||
|
||||
The SAML 2.0 specification provides a mechanism for Service Providers to
|
||||
describe their capabilities and configuration using a _metadata file_.
|
||||
|
||||
The `saml-metadata` command generates such a file, based on the configuration of
|
||||
a SAML realm in {es}.
|
||||
|
||||
Some SAML Identity Providers will allow you to automatically import a metadata
|
||||
file when you configure the Elastic Stack as a Service Provider.
|
||||
|
||||
You can optionally select to digitally sign the metadata file in order to
|
||||
ensure its integrity and authenticity before sharing it with the Identity Provider.
|
||||
The key used for signing the metadata file need not necessarily be the same as
|
||||
the keys already used in the saml realm configuration for SAML message signing.
|
||||
|
||||
[float]
|
||||
=== Parameters
|
||||
|
||||
`--attribute <name>`:: Specifies a SAML attribute that should be
|
||||
included as a `<RequestedAttribute>` element in the metadata. Any attribute
|
||||
configured in the {es} realm is automatically included and does not need to be
|
||||
specified as a commandline option.
|
||||
|
||||
`--batch`:: Do not prompt for user input.
|
||||
|
||||
`--contacts`:: Specifies that the metadata should include one or more
|
||||
`<ContactPerson>` elements. The user will be prompted to enter the details for
|
||||
each person.
|
||||
|
||||
`-E <KeyValuePair>`:: Configures an {es} setting.
|
||||
|
||||
`-h, --help`:: Returns all of the command parameters.
|
||||
|
||||
`--locale <name>`:: Specifies the locale to use for metadata elements such as
|
||||
`<ServiceName>`. Defaults to the JVM's default system locale.
|
||||
|
||||
`--organisation-display-name <name`:: Specified the value of the
|
||||
`<OrganizationDisplayName>` element.
|
||||
Only valid if `--organisation-name` is also specified.
|
||||
|
||||
`--organisation-name <name>`:: Specifies that an `<Organization>` element should
|
||||
be included in the metadata and provides the value for the `<OrganizationName>`.
|
||||
If this is specified, then `--organisation-url` must also be specified.
|
||||
|
||||
`--organisation-url <url>`:: Specifies the value of the `<OrganizationURL>`
|
||||
element. This is required if `--organisation-name` is specified.
|
||||
|
||||
`--out <file_path>`:: Specifies a path for the output files.
|
||||
Defaults to `saml-elasticsearch-metadata.xml`
|
||||
|
||||
`--service-name <name>`:: Specifies the value for the `<ServiceName>` element in
|
||||
the metadata. Defaults to `elasticsearch`.
|
||||
|
||||
`--signing-bundle <file_path>`:: Specifies the path to an existing key pair
|
||||
(in PKCS#12 format). The private key of that key pair will be used to sign
|
||||
the metadata file.
|
||||
|
||||
`--signing-cert <file_path>`:: Specifies the path to an existing certificate (in
|
||||
PEM format) to be used for signing of the metadata file. You must also specify
|
||||
the `--signing-key` parameter. This parameter cannot be used with the
|
||||
`--signing-bundle` parameter.
|
||||
|
||||
`--signing-key <file_path>`:: Specifies the path to an existing key (in PEM format)
|
||||
to be used for signing of the metadata file. You must also specify the
|
||||
`--signing-cert` parameter. This parameter cannot be used with the
|
||||
`--signing-bundle` parameter.
|
||||
|
||||
`--signing-key-password <password>`:: Specifies the password for the signing key.
|
||||
It can be used with either the `--signing-key` or the `--signing-bundle` parameters.
|
||||
|
||||
`--realm <name>`:: Specifies the name of the realm for which the metadata
|
||||
should be generated. This parameter is required if there is more than 1 `saml`
|
||||
realm in your {es} configuration.
|
||||
|
||||
`-s, --silent`:: Shows minimal output.
|
||||
|
||||
`-v, --verbose`:: Shows verbose output.
|
||||
|
||||
[float]
|
||||
=== Examples
|
||||
|
||||
The following command generates a default metadata file for the `saml1` realm:
|
||||
|
||||
[source, sh]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/saml-metadata --realm saml1
|
||||
--------------------------------------------------
|
||||
|
||||
The file will be written to `saml-elasticsearch-metadata.xml`.
|
||||
You may be prompted to provide the "friendlyName" value for any attributes that
|
||||
are used by the realm.
|
||||
|
||||
The following command generates a metadata file for the `saml2` realm, with a
|
||||
`<ServiceName>` of `kibana-finance`, a locale of `en-GB` and includes
|
||||
`<ContactPerson>` elements and an `<Organization>` element:
|
||||
|
||||
[source, sh]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/saml-metadata --realm saml2 \
|
||||
--service-name kibana-finance \
|
||||
--locale en-GB \
|
||||
--contacts \
|
||||
--organisation-name "Mega Corp. Finance Team" \
|
||||
--organisation-url "http://mega.example.com/finance/"
|
||||
--------------------------------------------------
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
[role="xpack"]
|
||||
[[setup-passwords]]
|
||||
== setup-passwords
|
||||
|
||||
The `setup-passwords` command sets the passwords for the built-in `elastic`,
|
||||
`kibana`, `logstash_system`, and `beats_system` users.
|
||||
|
||||
[float]
|
||||
=== Synopsis
|
||||
|
||||
[source,shell]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/setup-passwords auto|interactive
|
||||
[-b, --batch] [-h, --help] [-E <KeyValuePair>]
|
||||
[-s, --silent] [-u, --url "<URL>"] [-v, --verbose]
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Description
|
||||
|
||||
This command is intended for use only during the initial configuration of
|
||||
{xpack}. It uses the
|
||||
{xpack-ref}/setting-up-authentication.html#bootstrap-elastic-passwords[`elastic` bootstrap password]
|
||||
to run user management API requests. After you set a password for the `elastic`
|
||||
user, the bootstrap password is no longer active and you cannot use this command.
|
||||
Instead, you can change passwords by using the *Management > Users* UI in {kib}
|
||||
or the <<security-api-change-password,Change Password API>>.
|
||||
|
||||
This command uses an HTTP connection to connect to the cluster and run the user
|
||||
management requests. If your cluster uses TLS/SSL on the HTTP layer, the command
|
||||
automatically attempts to establish the connection by using the HTTPS protocol.
|
||||
It configures the connection by using the `xpack.security.http.ssl` settings in
|
||||
the `elasticsearch.yml` file. If you do not use the default config directory
|
||||
location, ensure that the *ES_PATH_CONF* environment variable returns the
|
||||
correct path before you run the `setup-passwords` command. You can override
|
||||
settings in your `elasticsearch.yml` file by using the `-E` command option.
|
||||
For more information about debugging connection failures, see
|
||||
{xpack-ref}/trb-security-setup.html[Setup-passwords command fails due to connection failure].
|
||||
|
||||
[float]
|
||||
=== Parameters
|
||||
|
||||
`auto`:: Outputs randomly-generated passwords to the console.
|
||||
|
||||
`-b, --batch`:: If enabled, runs the change password process without prompting the
|
||||
user.
|
||||
|
||||
`-E <KeyValuePair>`:: Configures a standard {es} or {xpack} setting.
|
||||
|
||||
`-h, --help`:: Shows help information.
|
||||
|
||||
`interactive`:: Prompts you to manually enter passwords.
|
||||
|
||||
`-s, --silent`:: Shows minimal output.
|
||||
|
||||
`-u, --url "<URL>"`:: Specifies the URL that the tool uses to submit the user management API
|
||||
requests. The default value is determined from the settings in your
|
||||
`elasticsearch.yml` file. If `xpack.security.http.ssl.enabled` is set to `true`,
|
||||
you must specify an HTTPS URL.
|
||||
|
||||
`-v, --verbose`:: Shows verbose output.
|
||||
|
||||
[float]
|
||||
=== Examples
|
||||
|
||||
The following example uses the `-u` parameter to tell the tool where to submit
|
||||
its user management API requests:
|
||||
|
||||
[source,shell]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/setup-passwords auto -u "http://localhost:9201"
|
||||
--------------------------------------------------
|
|
@ -0,0 +1,50 @@
|
|||
[role="xpack"]
|
||||
[[syskeygen]]
|
||||
== syskeygen
|
||||
|
||||
The `syskeygen` command creates a system key file in `CONFIG_DIR/x-pack`.
|
||||
|
||||
[float]
|
||||
=== Synopsis
|
||||
|
||||
[source,shell]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/syskeygen
|
||||
[-E <KeyValuePair>] [-h, --help]
|
||||
([-s, --silent] | [-v, --verbose])
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Description
|
||||
|
||||
The command generates a `system_key` file, which you can use to symmetrically
|
||||
encrypt sensitive data. For example, you can use this key to prevent {watcher}
|
||||
from returning and storing information that contains clear text credentials. See {xpack-ref}/encrypting-data.html[Encrypting sensitive data in {watcher}].
|
||||
|
||||
IMPORTANT: The system key is a symmetric key, so the same key must be used on
|
||||
every node in the cluster.
|
||||
|
||||
[float]
|
||||
=== Parameters
|
||||
|
||||
`-E <KeyValuePair>`:: Configures a setting. For example, if you have a custom
|
||||
installation of {es}, you can use this parameter to specify the `ES_PATH_CONF`
|
||||
environment variable.
|
||||
|
||||
`-h, --help`:: Returns all of the command parameters.
|
||||
|
||||
`-s, --silent`:: Shows minimal output.
|
||||
|
||||
`-v, --verbose`:: Shows verbose output.
|
||||
|
||||
|
||||
[float]
|
||||
=== Examples
|
||||
|
||||
The following command generates a `system_key` file in the
|
||||
default `$ES_HOME/config/x-pack` directory:
|
||||
|
||||
[source, sh]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/syskeygen
|
||||
--------------------------------------------------
|
|
@ -0,0 +1,138 @@
|
|||
[role="xpack"]
|
||||
[[users-command]]
|
||||
== Users Command
|
||||
++++
|
||||
<titleabbrev>users</titleabbrev>
|
||||
++++
|
||||
|
||||
If you use file-based user authentication, the `users` command enables you to
|
||||
add and remove users, assign user roles, and manage passwords.
|
||||
|
||||
[float]
|
||||
=== Synopsis
|
||||
|
||||
[source,shell]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/users
|
||||
([useradd <username>] [-p <password>] [-r <roles>]) |
|
||||
([list] <username>) |
|
||||
([passwd <username>] [-p <password>]) |
|
||||
([roles <username>] [-a <roles>] [-r <roles>]) |
|
||||
([userdel <username>])
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Description
|
||||
|
||||
If you use the built-in `file` internal realm, users are defined in local files
|
||||
on each node in the cluster.
|
||||
|
||||
Usernames and roles must be at least 1 and no more than 1024 characters. They
|
||||
can contain alphanumeric characters (`a-z`, `A-Z`, `0-9`), spaces, punctuation,
|
||||
and printable symbols in the
|
||||
https://en.wikipedia.org/wiki/Basic_Latin_(Unicode_block)[Basic Latin (ASCII) block].
|
||||
Leading or trailing whitespace is not allowed.
|
||||
|
||||
Passwords must be at least 6 characters long.
|
||||
|
||||
For more information, see {xpack-ref}/file-realm.html[File-based User Authentication].
|
||||
|
||||
TIP: To ensure that {es} can read the user and role information at startup, run
|
||||
`users useradd` as the same user you use to run {es}. Running the command as
|
||||
root or some other user updates the permissions for the `users` and `users_roles`
|
||||
files and prevents {es} from accessing them.
|
||||
|
||||
[float]
|
||||
=== Parameters
|
||||
|
||||
`-a <roles>`:: If used with the `roles` parameter, adds a comma-separated list
|
||||
of roles to a user.
|
||||
|
||||
//`-h, --help`:: Returns all of the command parameters.
|
||||
|
||||
`list`:: List the users that are registered with the `file` realm
|
||||
on the local node. If you also specify a user name, the command provides
|
||||
information for that user.
|
||||
|
||||
`-p <password>`:: Specifies the user's password. If you do not specify this
|
||||
parameter, the command prompts you for the password.
|
||||
+
|
||||
--
|
||||
TIP: Omit the `-p` option to keep
|
||||
plaintext passwords out of the terminal session's command history.
|
||||
|
||||
--
|
||||
|
||||
`passwd <username>`:: Resets a user's password. You can specify the new
|
||||
password directly with the `-p` parameter.
|
||||
|
||||
`-r <roles>`::
|
||||
* If used with the `useradd` parameter, defines a user's roles. This option
|
||||
accepts a comma-separated list of role names to assign to the user.
|
||||
* If used with the `roles` parameter, removes a comma-separated list of roles
|
||||
from a user.
|
||||
|
||||
`roles`:: Manages the roles of a particular user. You can combine adding and
|
||||
removing roles within the same command to change a user's roles.
|
||||
|
||||
//`-s, --silent`:: Shows minimal output.
|
||||
|
||||
`useradd <username>`:: Adds a user to your local node.
|
||||
|
||||
`userdel <username>`:: Deletes a user from your local node.
|
||||
|
||||
//`-v, --verbose`:: Shows verbose output.
|
||||
|
||||
//[float]
|
||||
//=== Authorization
|
||||
|
||||
[float]
|
||||
=== Examples
|
||||
|
||||
The following example adds a new user named `jacknich` to the `file` realm. The
|
||||
password for this user is `theshining`, and this user is associated with the
|
||||
`network` and `monitoring` roles.
|
||||
|
||||
[source,shell]
|
||||
-------------------------------------------------------------------
|
||||
bin/x-pack/users useradd jacknich -p theshining -r network,monitoring
|
||||
-------------------------------------------------------------------
|
||||
|
||||
The following example lists the users that are registered with the `file` realm
|
||||
on the local node:
|
||||
|
||||
[source, shell]
|
||||
----------------------------------
|
||||
bin/x-pack/users list
|
||||
rdeniro : admin
|
||||
alpacino : power_user
|
||||
jacknich : monitoring,network
|
||||
----------------------------------
|
||||
|
||||
Users are in the left-hand column and their corresponding roles are listed in
|
||||
the right-hand column.
|
||||
|
||||
The following example resets the `jacknich` user's password:
|
||||
|
||||
[source,shell]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/users passwd jachnich
|
||||
--------------------------------------------------
|
||||
|
||||
Since the `-p` parameter was omitted, the command prompts you to enter and
|
||||
confirm a password in interactive mode.
|
||||
|
||||
The following example removes the `network` and `monitoring` roles from the
|
||||
`jacknich` user and adds the `user` role:
|
||||
|
||||
[source,shell]
|
||||
------------------------------------------------------------
|
||||
bin/x-pack/users roles jacknich -r network,monitoring -a user
|
||||
------------------------------------------------------------
|
||||
|
||||
The following example deletes the `jacknich` user:
|
||||
|
||||
[source,shell]
|
||||
--------------------------------------------------
|
||||
bin/x-pack/users userdel jacknich
|
||||
--------------------------------------------------
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
include::{es-repo-dir}/index-shared1.asciidoc[]
|
||||
|
||||
:edit_url!:
|
||||
include::setup/setup-xes.asciidoc[]
|
||||
|
||||
:edit_url:
|
||||
include::{es-repo-dir}/index-shared2.asciidoc[]
|
||||
|
||||
:edit_url!:
|
||||
include::release-notes/xpack-breaking.asciidoc[]
|
||||
|
||||
:edit_url:
|
||||
include::{es-repo-dir}/index-shared3.asciidoc[]
|
||||
|
||||
:edit_url!:
|
||||
include::sql/index.asciidoc[]
|
||||
|
||||
:edit_url!:
|
||||
include::monitoring/index.asciidoc[]
|
||||
|
||||
:edit_url!:
|
||||
include::rollup/index.asciidoc[]
|
||||
|
||||
:edit_url!:
|
||||
include::rest-api/index.asciidoc[]
|
||||
|
||||
:edit_url!:
|
||||
include::commands/index.asciidoc[]
|
||||
|
||||
:edit_url:
|
||||
include::{es-repo-dir}/index-shared4.asciidoc[]
|
||||
|
||||
:edit_url!:
|
||||
include::release-notes/xpack-xes.asciidoc[]
|
||||
|
||||
:edit_url:
|
||||
include::{es-repo-dir}/index-shared5.asciidoc[]
|
|
@ -0,0 +1,183 @@
|
|||
[[ml-configuring-aggregation]]
|
||||
=== Aggregating Data For Faster Performance
|
||||
|
||||
By default, {dfeeds} fetch data from {es} using search and scroll requests.
|
||||
It can be significantly more efficient, however, to aggregate data in {es}
|
||||
and to configure your jobs to analyze aggregated data.
|
||||
|
||||
One of the benefits of aggregating data this way is that {es} automatically
|
||||
distributes these calculations across your cluster. You can then feed this
|
||||
aggregated data into {xpackml} instead of raw results, which
|
||||
reduces the volume of data that must be considered while detecting anomalies.
|
||||
|
||||
There are some limitations to using aggregations in {dfeeds}, however.
|
||||
Your aggregation must include a buckets aggregation, which in turn must contain
|
||||
a date histogram aggregation. This requirement ensures that the aggregated
|
||||
data is a time series. If you use a terms aggregation and the cardinality of a
|
||||
term is high, then the aggregation might not be effective and you might want
|
||||
to just use the default search and scroll behavior.
|
||||
|
||||
When you create or update a job, you can include the names of aggregations, for
|
||||
example:
|
||||
|
||||
[source,js]
|
||||
----------------------------------
|
||||
PUT _xpack/ml/anomaly_detectors/farequote
|
||||
{
|
||||
"analysis_config": {
|
||||
"bucket_span": "60m",
|
||||
"detectors": [{
|
||||
"function":"mean",
|
||||
"field_name":"responsetime",
|
||||
"by_field_name":"airline"
|
||||
}],
|
||||
"summary_count_field_name": "doc_count"
|
||||
},
|
||||
"data_description": {
|
||||
"time_field":"time"
|
||||
}
|
||||
}
|
||||
----------------------------------
|
||||
|
||||
In this example, the `airline`, `responsetime`, and `time` fields are
|
||||
aggregations.
|
||||
|
||||
NOTE: When the `summary_count_field_name` property is set to a non-null value,
|
||||
the job expects to receive aggregated input. The property must be set to the
|
||||
name of the field that contains the count of raw data points that have been
|
||||
aggregated. It applies to all detectors in the job.
|
||||
|
||||
The aggregations are defined in the {dfeed} as follows:
|
||||
|
||||
[source,js]
|
||||
----------------------------------
|
||||
PUT _xpack/ml/datafeeds/datafeed-farequote
|
||||
{
|
||||
"job_id":"farequote",
|
||||
"indices": ["farequote"],
|
||||
"types": ["response"],
|
||||
"aggregations": {
|
||||
"buckets": {
|
||||
"date_histogram": {
|
||||
"field": "time",
|
||||
"interval": "360s",
|
||||
"time_zone": "UTC"
|
||||
},
|
||||
"aggregations": {
|
||||
"time": {
|
||||
"max": {"field": "time"}
|
||||
},
|
||||
"airline": {
|
||||
"terms": {
|
||||
"field": "airline",
|
||||
"size": 100
|
||||
},
|
||||
"aggregations": {
|
||||
"responsetime": {
|
||||
"avg": {
|
||||
"field": "responsetime"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
----------------------------------
|
||||
|
||||
|
||||
In this example, the aggregations have names that match the fields that they
|
||||
operate on. That is to say, the `max` aggregation is named `time` and its
|
||||
field is also `time`. The same is true for the aggregations with the names
|
||||
`airline` and `responsetime`. Since you must create the job before you can
|
||||
create the {dfeed}, synchronizing your aggregation and field names can simplify
|
||||
these configuration steps.
|
||||
|
||||
IMPORTANT: If you use a `max` aggregation on a time field, the aggregation name
|
||||
in the {dfeed} must match the name of the time field, as in the previous example.
|
||||
For all other aggregations, if the aggregation name doesn't match the field name,
|
||||
there are limitations in the drill-down functionality within the {ml} page in
|
||||
{kib}.
|
||||
|
||||
When you define an aggregation in a {dfeed}, it must have the following form:
|
||||
|
||||
[source,js]
|
||||
----------------------------------
|
||||
"aggregations" : {
|
||||
"buckets" : {
|
||||
"date_histogram" : {
|
||||
"time_zone": "UTC", ...
|
||||
},
|
||||
"aggregations": {
|
||||
"<time_field>": {
|
||||
"max": {
|
||||
"field":"<time_field>"
|
||||
}
|
||||
}
|
||||
[,"<first_term>": {
|
||||
"terms":{...
|
||||
}
|
||||
[,"aggregations" : {
|
||||
[<sub_aggregation>]+
|
||||
} ]
|
||||
}]
|
||||
}
|
||||
}
|
||||
}
|
||||
----------------------------------
|
||||
|
||||
You must specify `buckets` as the aggregation name and `date_histogram` as the
|
||||
aggregation type. For more information, see
|
||||
{ref}/search-aggregations-bucket-datehistogram-aggregation.html[Date Histogram Aggregation].
|
||||
|
||||
NOTE: The `time_zone` parameter in the date histogram aggregation must be set to `UTC`,
|
||||
which is the default value.
|
||||
|
||||
Each histogram bucket has a key, which is the bucket start time. This key cannot
|
||||
be used for aggregations in {dfeeds}, however, because they need to know the
|
||||
time of the latest record within a bucket. Otherwise, when you restart a {dfeed},
|
||||
it continues from the start time of the histogram bucket and possibly fetches
|
||||
the same data twice. The max aggregation for the time field is therefore
|
||||
necessary to provide the time of the latest record within a bucket.
|
||||
|
||||
You can optionally specify a terms aggregation, which creates buckets for
|
||||
different values of a field.
|
||||
|
||||
IMPORTANT: If you use a terms aggregation, by default it returns buckets for
|
||||
the top ten terms. Thus if the cardinality of the term is greater than 10, not
|
||||
all terms are analyzed.
|
||||
|
||||
You can change this behavior by setting the `size` parameter. To
|
||||
determine the cardinality of your data, you can run searches such as:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET .../_search {
|
||||
"aggs": {
|
||||
"service_cardinality": {
|
||||
"cardinality": {
|
||||
"field": "service"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
By default, {es} limits the maximum number of terms returned to 10000. For high
|
||||
cardinality fields, the query might not run. It might return errors related to
|
||||
circuit breaking exceptions that indicate that the data is too large. In such
|
||||
cases, do not use aggregations in your {dfeed}. For more
|
||||
information, see {ref}/search-aggregations-bucket-terms-aggregation.html[Terms Aggregation].
|
||||
|
||||
You can also optionally specify multiple sub-aggregations.
|
||||
The sub-aggregations are aggregated for the buckets that were created by their
|
||||
parent aggregation. For more information, see
|
||||
{ref}/search-aggregations.html[Aggregations].
|
||||
|
||||
TIP: If your detectors use metric or sum analytical functions, set the
|
||||
`interval` of the date histogram aggregation to a tenth of the `bucket_span`
|
||||
that was defined in the job. This suggestion creates finer, more granular time
|
||||
buckets, which are ideal for this type of analysis. If your detectors use count
|
||||
or rare functions, set `interval` to the same value as `bucket_span`. For more
|
||||
information about analytical functions, see <<ml-functions>>.
|
|
@ -0,0 +1,29 @@
|
|||
[float]
|
||||
[[ml-analyzing]]
|
||||
=== Analyzing the Past and Present
|
||||
|
||||
The {xpackml} features automate the analysis of time-series data by creating
|
||||
accurate baselines of normal behavior in the data and identifying anomalous
|
||||
patterns in that data. You can submit your data for analysis in batches or
|
||||
continuously in real-time {dfeeds}.
|
||||
|
||||
Using proprietary {ml} algorithms, the following circumstances are detected,
|
||||
scored, and linked with statistically significant influencers in the data:
|
||||
|
||||
* Anomalies related to temporal deviations in values, counts, or frequencies
|
||||
* Statistical rarity
|
||||
* Unusual behaviors for a member of a population
|
||||
|
||||
Automated periodicity detection and quick adaptation to changing data ensure
|
||||
that you don’t need to specify algorithms, models, or other data science-related
|
||||
configurations in order to get the benefits of {ml}.
|
||||
|
||||
You can view the {ml} results in {kib} where, for example, charts illustrate the
|
||||
actual data values, the bounds for the expected values, and the anomalies that
|
||||
occur outside these bounds.
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job-analysis.jpg["Example screenshot from the Machine Learning Single Metric Viewer in Kibana"]
|
||||
|
||||
For a more detailed walk-through of {xpackml} features, see
|
||||
<<ml-getting-started>>.
|
|
@ -0,0 +1,91 @@
|
|||
[[ml-api-quickref]]
|
||||
== API Quick Reference
|
||||
|
||||
All {ml} endpoints have the following base:
|
||||
|
||||
[source,js]
|
||||
----
|
||||
/_xpack/ml/
|
||||
----
|
||||
|
||||
The main {ml} resources can be accessed with a variety of endpoints:
|
||||
|
||||
* <<ml-api-jobs,+/anomaly_detectors/+>>: Create and manage {ml} jobs
|
||||
* <<ml-api-datafeeds,+/datafeeds/+>>: Select data from {es} to be analyzed
|
||||
* <<ml-api-results,+/results/+>>: Access the results of a {ml} job
|
||||
* <<ml-api-snapshots,+/model_snapshots/+>>: Manage model snapshots
|
||||
//* <<ml-api-validate,+/validate/+>>: Validate subsections of job configurations
|
||||
|
||||
[float]
|
||||
[[ml-api-jobs]]
|
||||
=== /anomaly_detectors/
|
||||
|
||||
* {ref}/ml-put-job.html[PUT /anomaly_detectors/<job_id+++>+++]: Create a job
|
||||
* {ref}/ml-open-job.html[POST /anomaly_detectors/<job_id>/_open]: Open a job
|
||||
* {ref}/ml-post-data.html[POST /anomaly_detectors/<job_id>/_data]: Send data to a job
|
||||
* {ref}/ml-get-job.html[GET /anomaly_detectors]: List jobs
|
||||
* {ref}/ml-get-job.html[GET /anomaly_detectors/<job_id+++>+++]: Get job details
|
||||
* {ref}/ml-get-job-stats.html[GET /anomaly_detectors/<job_id>/_stats]: Get job statistics
|
||||
* {ref}/ml-update-job.html[POST /anomaly_detectors/<job_id>/_update]: Update certain properties of the job configuration
|
||||
* {ref}/ml-flush-job.html[POST anomaly_detectors/<job_id>/_flush]: Force a job to analyze buffered data
|
||||
* {ref}/ml-forecast.html[POST anomaly_detectors/<job_id>/_forecast]: Forecast future job behavior
|
||||
* {ref}/ml-close-job.html[POST /anomaly_detectors/<job_id>/_close]: Close a job
|
||||
* {ref}/ml-delete-job.html[DELETE /anomaly_detectors/<job_id+++>+++]: Delete a job
|
||||
|
||||
[float]
|
||||
[[ml-api-calendars]]
|
||||
=== /calendars/
|
||||
|
||||
* {ref}/ml-put-calendar.html[PUT /calendars/<calendar_id+++>+++]: Create a calendar
|
||||
* {ref}/ml-post-calendar-event.html[POST /calendars/<calendar_id+++>+++/events]: Add a scheduled event to a calendar
|
||||
* {ref}/ml-put-calendar-job.html[PUT /calendars/<calendar_id+++>+++/jobs/<job_id+++>+++]: Associate a job with a calendar
|
||||
* {ref}/ml-get-calendar.html[GET /calendars/<calendar_id+++>+++]: Get calendar details
|
||||
* {ref}/ml-get-calendar-event.html[GET /calendars/<calendar_id+++>+++/events]: Get scheduled event details
|
||||
* {ref}/ml-delete-calendar-event.html[DELETE /calendars/<calendar_id+++>+++/events/<event_id+++>+++]: Remove a scheduled event from a calendar
|
||||
* {ref}/ml-delete-calendar-job.html[DELETE /calendars/<calendar_id+++>+++/jobs/<job_id+++>+++]: Disassociate a job from a calendar
|
||||
* {ref}/ml-delete-calendar.html[DELETE /calendars/<calendar_id+++>+++]: Delete a calendar
|
||||
|
||||
[float]
|
||||
[[ml-api-datafeeds]]
|
||||
=== /datafeeds/
|
||||
|
||||
* {ref}/ml-put-datafeed.html[PUT /datafeeds/<datafeed_id+++>+++]: Create a {dfeed}
|
||||
* {ref}/ml-start-datafeed.html[POST /datafeeds/<datafeed_id>/_start]: Start a {dfeed}
|
||||
* {ref}/ml-get-datafeed.html[GET /datafeeds]: List {dfeeds}
|
||||
* {ref}/ml-get-datafeed.html[GET /datafeeds/<datafeed_id+++>+++]: Get {dfeed} details
|
||||
* {ref}/ml-get-datafeed-stats.html[GET /datafeeds/<datafeed_id>/_stats]: Get statistical information for {dfeeds}
|
||||
* {ref}/ml-preview-datafeed.html[GET /datafeeds/<datafeed_id>/_preview]: Get a preview of a {dfeed}
|
||||
* {ref}/ml-update-datafeed.html[POST /datafeeds/<datafeedid>/_update]: Update certain settings for a {dfeed}
|
||||
* {ref}/ml-stop-datafeed.html[POST /datafeeds/<datafeed_id>/_stop]: Stop a {dfeed}
|
||||
* {ref}/ml-delete-datafeed.html[DELETE /datafeeds/<datafeed_id+++>+++]: Delete {dfeed}
|
||||
|
||||
[float]
|
||||
[[ml-api-results]]
|
||||
=== /results/
|
||||
|
||||
* {ref}/ml-get-bucket.html[GET /results/buckets]: List the buckets in the results
|
||||
* {ref}/ml-get-bucket.html[GET /results/buckets/<bucket_id+++>+++]: Get bucket details
|
||||
* {ref}/ml-get-overall-buckets.html[GET /results/overall_buckets]: Get overall bucket results for multiple jobs
|
||||
* {ref}/ml-get-category.html[GET /results/categories]: List the categories in the results
|
||||
* {ref}/ml-get-category.html[GET /results/categories/<category_id+++>+++]: Get category details
|
||||
* {ref}/ml-get-influencer.html[GET /results/influencers]: Get influencer details
|
||||
* {ref}/ml-get-record.html[GET /results/records]: Get records from the results
|
||||
|
||||
[float]
|
||||
[[ml-api-snapshots]]
|
||||
=== /model_snapshots/
|
||||
|
||||
* {ref}/ml-get-snapshot.html[GET /model_snapshots]: List model snapshots
|
||||
* {ref}/ml-get-snapshot.html[GET /model_snapshots/<snapshot_id+++>+++]: Get model snapshot details
|
||||
* {ref}/ml-revert-snapshot.html[POST /model_snapshots/<snapshot_id>/_revert]: Revert a model snapshot
|
||||
* {ref}/ml-update-snapshot.html[POST /model_snapshots/<snapshot_id>/_update]: Update certain settings for a model snapshot
|
||||
* {ref}/ml-delete-snapshot.html[DELETE /model_snapshots/<snapshot_id+++>+++]: Delete a model snapshot
|
||||
|
||||
////
|
||||
[float]
|
||||
[[ml-api-validate]]
|
||||
=== /validate/
|
||||
|
||||
* {ref}/ml-valid-detector.html[POST /anomaly_detectors/_validate/detector]: Validate a detector
|
||||
* {ref}/ml-valid-job.html[POST /anomaly_detectors/_validate]: Validate a job
|
||||
////
|
|
@ -0,0 +1,9 @@
|
|||
[float]
|
||||
[[ml-nodes]]
|
||||
=== Machine learning nodes
|
||||
|
||||
A {ml} node is a node that has `xpack.ml.enabled` and `node.ml` set to `true`,
|
||||
which is the default behavior. If you set `node.ml` to `false`, the node can
|
||||
service API requests but it cannot run jobs. If you want to use {xpackml}
|
||||
features, there must be at least one {ml} node in your cluster. For more
|
||||
information about this setting, see <<xpack-settings>>.
|
|
@ -0,0 +1,26 @@
|
|||
[[ml-buckets]]
|
||||
=== Buckets
|
||||
++++
|
||||
<titleabbrev>Buckets</titleabbrev>
|
||||
++++
|
||||
|
||||
The {xpackml} features use the concept of a _bucket_ to divide the time series
|
||||
into batches for processing.
|
||||
|
||||
The _bucket span_ is part of the configuration information for a job. It defines
|
||||
the time interval that is used to summarize and model the data. This is
|
||||
typically between 5 minutes to 1 hour and it depends on your data characteristics.
|
||||
When you set the bucket span, take into account the granularity at which you
|
||||
want to analyze, the frequency of the input data, the typical duration of the
|
||||
anomalies, and the frequency at which alerting is required.
|
||||
|
||||
When you view your {ml} results, each bucket has an anomaly score. This score is
|
||||
a statistically aggregated and normalized view of the combined anomalousness of
|
||||
all the record results in the bucket. If you have more than one job, you can
|
||||
also obtain overall bucket results, which combine and correlate anomalies from
|
||||
multiple jobs into an overall score. When you view the results for jobs groups
|
||||
in {kib}, it provides the overall bucket scores.
|
||||
|
||||
For more information, see
|
||||
{ref}/ml-results-resource.html[Results Resources] and
|
||||
{ref}/ml-get-overall-buckets.html[Get Overall Buckets API].
|
|
@ -0,0 +1,40 @@
|
|||
[[ml-calendars]]
|
||||
=== Calendars and Scheduled Events
|
||||
|
||||
Sometimes there are periods when you expect unusual activity to take place,
|
||||
such as bank holidays, "Black Friday", or planned system outages. If you
|
||||
identify these events in advance, no anomalies are generated during that period.
|
||||
The {ml} model is not ill-affected and you do not receive spurious results.
|
||||
|
||||
You can create calendars and scheduled events in the **Settings** pane on the
|
||||
**Machine Learning** page in {kib} or by using {ref}/ml-apis.html[{ml} APIs].
|
||||
|
||||
A scheduled event must have a start time, end time, and description. In general,
|
||||
scheduled events are short in duration (typically lasting from a few hours to a
|
||||
day) and occur infrequently. If you have regularly occurring events, such as
|
||||
weekly maintenance periods, you do not need to create scheduled events for these
|
||||
circumstances; they are already handled by the {ml} analytics.
|
||||
|
||||
You can identify zero or more scheduled events in a calendar. Jobs can then
|
||||
subscribe to calendars and the {ml} analytics handle all subsequent scheduled
|
||||
events appropriately.
|
||||
|
||||
If you want to add multiple scheduled events at once, you can import an
|
||||
iCalendar (`.ics`) file in {kib} or a JSON file in the
|
||||
{ref}/ml-post-calendar-event.html[add events to calendar API].
|
||||
|
||||
[NOTE]
|
||||
--
|
||||
|
||||
* You must identify scheduled events before your job analyzes the data for that
|
||||
time period. Machine learning results are not updated retroactively.
|
||||
* If your iCalendar file contains recurring events, only the first occurrence is
|
||||
imported.
|
||||
* Bucket results are generated during scheduled events but they have an
|
||||
anomaly score of zero. For more information about bucket results, see
|
||||
{ref}/ml-results-resource.html[Results Resources].
|
||||
* If you use long or frequent scheduled events, it might take longer for the
|
||||
{ml} analytics to learn to model your data and some anomalous behavior might be
|
||||
missed.
|
||||
|
||||
--
|
|
@ -0,0 +1,228 @@
|
|||
[[ml-configuring-categories]]
|
||||
=== Categorizing log messages
|
||||
|
||||
Application log events are often unstructured and contain variable data. For
|
||||
example:
|
||||
//Obtained from it_ops_new_app_logs.json
|
||||
[source,js]
|
||||
----------------------------------
|
||||
{"time":1454516381000,"message":"org.jdbi.v2.exceptions.UnableToExecuteStatementException: com.mysql.jdbc.exceptions.MySQLTimeoutException: Statement cancelled due to timeout or client request [statement:\"SELECT id, customer_id, name, force_disabled, enabled FROM customers\"]","type":"logs"}
|
||||
----------------------------------
|
||||
//NOTCONSOLE
|
||||
|
||||
You can use {ml} to observe the static parts of the message, cluster similar
|
||||
messages together, and classify them into message categories.
|
||||
|
||||
The {ml} model learns what volume and pattern is normal for each category over
|
||||
time. You can then detect anomalies and surface rare events or unusual types of
|
||||
messages by using count or rare functions. For example:
|
||||
|
||||
//Obtained from it_ops_new_app_logs.sh
|
||||
[source,js]
|
||||
----------------------------------
|
||||
PUT _xpack/ml/anomaly_detectors/it_ops_new_logs
|
||||
{
|
||||
"description" : "IT Ops Application Logs",
|
||||
"analysis_config" : {
|
||||
"categorization_field_name": "message", <1>
|
||||
"bucket_span":"30m",
|
||||
"detectors" :[{
|
||||
"function":"count",
|
||||
"by_field_name": "mlcategory", <2>
|
||||
"detector_description": "Unusual message counts"
|
||||
}],
|
||||
"categorization_filters":[ "\\[statement:.*\\]"]
|
||||
},
|
||||
"analysis_limits":{
|
||||
"categorization_examples_limit": 5
|
||||
},
|
||||
"data_description" : {
|
||||
"time_field":"time",
|
||||
"time_format": "epoch_ms"
|
||||
}
|
||||
}
|
||||
----------------------------------
|
||||
//CONSOLE
|
||||
<1> The `categorization_field_name` property indicates which field will be
|
||||
categorized.
|
||||
<2> The resulting categories are used in a detector by setting `by_field_name`,
|
||||
`over_field_name`, or `partition_field_name` to the keyword `mlcategory`. If you
|
||||
do not specify this keyword in one of those properties, the API request fails.
|
||||
|
||||
The optional `categorization_examples_limit` property specifies the
|
||||
maximum number of examples that are stored in memory and in the results data
|
||||
store for each category. The default value is `4`. Note that this setting does
|
||||
not affect the categorization; it just affects the list of visible examples. If
|
||||
you increase this value, more examples are available, but you must have more
|
||||
storage available. If you set this value to `0`, no examples are stored.
|
||||
|
||||
The optional `categorization_filters` property can contain an array of regular
|
||||
expressions. If a categorization field value matches the regular expression, the
|
||||
portion of the field that is matched is not taken into consideration when
|
||||
defining categories. The categorization filters are applied in the order they
|
||||
are listed in the job configuration, which allows you to disregard multiple
|
||||
sections of the categorization field value. In this example, we have decided that
|
||||
we do not want the detailed SQL to be considered in the message categorization.
|
||||
This particular categorization filter removes the SQL statement from the categorization
|
||||
algorithm.
|
||||
|
||||
If your data is stored in {es}, you can create an advanced job with these same
|
||||
properties:
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-category-advanced.jpg["Advanced job configuration options related to categorization"]
|
||||
|
||||
NOTE: To add the `categorization_examples_limit` property, you must use the
|
||||
**Edit JSON** tab and copy the `analysis_limits` object from the API example.
|
||||
|
||||
[float]
|
||||
[[ml-configuring-analyzer]]
|
||||
==== Customizing the Categorization Analyzer
|
||||
|
||||
Categorization uses English dictionary words to identify log message categories.
|
||||
By default, it also uses English tokenization rules. For this reason, if you use
|
||||
the default categorization analyzer, only English language log messages are
|
||||
supported, as described in the <<ml-limitations>>.
|
||||
|
||||
You can, however, change the tokenization rules by customizing the way the
|
||||
categorization field values are interpreted. For example:
|
||||
|
||||
[source,js]
|
||||
----------------------------------
|
||||
PUT _xpack/ml/anomaly_detectors/it_ops_new_logs2
|
||||
{
|
||||
"description" : "IT Ops Application Logs",
|
||||
"analysis_config" : {
|
||||
"categorization_field_name": "message",
|
||||
"bucket_span":"30m",
|
||||
"detectors" :[{
|
||||
"function":"count",
|
||||
"by_field_name": "mlcategory",
|
||||
"detector_description": "Unusual message counts"
|
||||
}],
|
||||
"categorization_analyzer":{
|
||||
"char_filter": [
|
||||
{ "type": "pattern_replace", "pattern": "\\[statement:.*\\]" } <1>
|
||||
],
|
||||
"tokenizer": "ml_classic", <2>
|
||||
"filter": [
|
||||
{ "type" : "stop", "stopwords": [
|
||||
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday",
|
||||
"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun",
|
||||
"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December",
|
||||
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
|
||||
"GMT", "UTC"
|
||||
] } <3>
|
||||
]
|
||||
}
|
||||
},
|
||||
"analysis_limits":{
|
||||
"categorization_examples_limit": 5
|
||||
},
|
||||
"data_description" : {
|
||||
"time_field":"time",
|
||||
"time_format": "epoch_ms"
|
||||
}
|
||||
}
|
||||
----------------------------------
|
||||
//CONSOLE
|
||||
<1> The
|
||||
{ref}/analysis-pattern-replace-charfilter.html[`pattern_replace` character filter]
|
||||
here achieves exactly the same as the `categorization_filters` in the first
|
||||
example.
|
||||
<2> The `ml_classic` tokenizer works like the non-customizable tokenization
|
||||
that was used for categorization in older versions of machine learning. If you
|
||||
want the same categorization behavior as older versions, use this property value.
|
||||
<3> By default, English day or month words are filtered from log messages before
|
||||
categorization. If your logs are in a different language and contain
|
||||
dates, you might get better results by filtering the day or month words in your
|
||||
language.
|
||||
|
||||
The optional `categorization_analyzer` property allows even greater customization
|
||||
of how categorization interprets the categorization field value. It can refer to
|
||||
a built-in {es} analyzer or a combination of zero or more character filters,
|
||||
a tokenizer, and zero or more token filters.
|
||||
|
||||
The `ml_classic` tokenizer and the day and month stopword filter are more or less
|
||||
equivalent to the following analyzer, which is defined using only built-in {es}
|
||||
{ref}/analysis-tokenizers.html[tokenizers] and
|
||||
{ref}/analysis-tokenfilters.html[token filters]:
|
||||
|
||||
[source,js]
|
||||
----------------------------------
|
||||
PUT _xpack/ml/anomaly_detectors/it_ops_new_logs3
|
||||
{
|
||||
"description" : "IT Ops Application Logs",
|
||||
"analysis_config" : {
|
||||
"categorization_field_name": "message",
|
||||
"bucket_span":"30m",
|
||||
"detectors" :[{
|
||||
"function":"count",
|
||||
"by_field_name": "mlcategory",
|
||||
"detector_description": "Unusual message counts"
|
||||
}],
|
||||
"categorization_analyzer":{
|
||||
"tokenizer": {
|
||||
"type" : "simple_pattern_split",
|
||||
"pattern" : "[^-0-9A-Za-z_.]+" <1>
|
||||
},
|
||||
"filter": [
|
||||
{ "type" : "pattern_replace", "pattern": "^[0-9].*" }, <2>
|
||||
{ "type" : "pattern_replace", "pattern": "^[-0-9A-Fa-f.]+$" }, <3>
|
||||
{ "type" : "pattern_replace", "pattern": "^[^0-9A-Za-z]+" }, <4>
|
||||
{ "type" : "pattern_replace", "pattern": "[^0-9A-Za-z]+$" }, <5>
|
||||
{ "type" : "stop", "stopwords": [
|
||||
"",
|
||||
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday",
|
||||
"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun",
|
||||
"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December",
|
||||
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
|
||||
"GMT", "UTC"
|
||||
] }
|
||||
]
|
||||
}
|
||||
},
|
||||
"analysis_limits":{
|
||||
"categorization_examples_limit": 5
|
||||
},
|
||||
"data_description" : {
|
||||
"time_field":"time",
|
||||
"time_format": "epoch_ms"
|
||||
}
|
||||
}
|
||||
----------------------------------
|
||||
//CONSOLE
|
||||
<1> Tokens basically consist of hyphens, digits, letters, underscores and dots.
|
||||
<2> By default, categorization ignores tokens that begin with a digit.
|
||||
<3> By default, categorization also ignores tokens that are hexadecimal numbers.
|
||||
<4> Underscores, hyphens, and dots are removed from the beginning of tokens.
|
||||
<5> Underscores, hyphens, and dots are also removed from the end of tokens.
|
||||
|
||||
The key difference between the default `categorization_analyzer` and this example
|
||||
analyzer is that using the `ml_classic` tokenizer is several times faster. The
|
||||
difference in behavior is that this custom analyzer does not include accented
|
||||
letters in tokens whereas the `ml_classic` tokenizer does, although that could
|
||||
be fixed by using more complex regular expressions.
|
||||
|
||||
For more information about the `categorization_analyzer` property, see
|
||||
{ref}/ml-job-resource.html#ml-categorizationanalyzer[Categorization Analyzer].
|
||||
|
||||
NOTE: To add the `categorization_analyzer` property in {kib}, you must use the
|
||||
**Edit JSON** tab and copy the `categorization_analyzer` object from one of the
|
||||
API examples above.
|
||||
|
||||
[float]
|
||||
[[ml-viewing-categories]]
|
||||
==== Viewing Categorization Results
|
||||
|
||||
After you open the job and start the {dfeed} or supply data to the job, you can
|
||||
view the categorization results in {kib}. For example:
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-category-anomalies.jpg["Categorization example in the Anomaly Explorer"]
|
||||
|
||||
For this type of job, the **Anomaly Explorer** contains extra information for
|
||||
each anomaly: the name of the category (for example, `mlcategory 11`) and
|
||||
examples of the messages in that category. In this case, you can use these
|
||||
details to investigate occurrences of unusually high message counts for specific
|
||||
message categories.
|
|
@ -0,0 +1,41 @@
|
|||
[[ml-configuring]]
|
||||
== Configuring Machine Learning
|
||||
|
||||
If you want to use {xpackml} features, there must be at least one {ml} node in
|
||||
your cluster and all master-eligible nodes must have {ml} enabled. By default,
|
||||
all nodes are {ml} nodes. For more information about these settings, see
|
||||
<<xpack-settings>>.
|
||||
|
||||
To use the {xpackml} features to analyze your data, you must create a job and
|
||||
send your data to that job.
|
||||
|
||||
* If your data is stored in {es}:
|
||||
|
||||
** You can create a {dfeed}, which retrieves data from {es} for analysis.
|
||||
** You can use {kib} to expedite the creation of jobs and {dfeeds}.
|
||||
|
||||
* If your data is not stored in {es}, you can
|
||||
{ref}/ml-post-data.html[POST data] from any source directly to an API.
|
||||
|
||||
The results of {ml} analysis are stored in {es} and you can use {kib} to help
|
||||
you visualize and explore the results.
|
||||
|
||||
For a tutorial that walks you through these configuration steps,
|
||||
see <<ml-getting-started>>.
|
||||
|
||||
Though it is quite simple to analyze your data and provide quick {ml} results,
|
||||
gaining deep insights might require some additional planning and configuration.
|
||||
The scenarios in this section describe some best practices for generating useful
|
||||
{ml} results and insights from your data.
|
||||
|
||||
* <<ml-configuring-url>>
|
||||
* <<ml-configuring-aggregation>>
|
||||
* <<ml-configuring-categories>>
|
||||
* <<ml-configuring-pop>>
|
||||
* <<ml-configuring-transform>>
|
||||
|
||||
include::customurl.asciidoc[]
|
||||
include::aggregations.asciidoc[]
|
||||
include::categories.asciidoc[]
|
||||
include::populations.asciidoc[]
|
||||
include::transforms.asciidoc[]
|
|
@ -0,0 +1,104 @@
|
|||
[[ml-configuring-url]]
|
||||
=== Adding Custom URLs To Machine Learning Results
|
||||
|
||||
When you create an advanced job or edit any job in {kib}, you can optionally
|
||||
attach one or more custom URLs. You can also specify these custom settings when
|
||||
you create or update jobs by using the {ml} APIs.
|
||||
|
||||
The custom URLs provide links from the anomalies table in the Anomaly Explorer
|
||||
or Single Metric Viewer window in {kib} to custom dashboards or external
|
||||
websites. For example, you can define a custom URL that provides a way for users
|
||||
to drill down to the source data from the results set.
|
||||
|
||||
For each custom URL, you must supply the URL and a label, which is the link text
|
||||
that appears in the anomalies table.
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-customurl.jpg["Links in the Anomaly Explorer anoamilies table"]
|
||||
|
||||
[float]
|
||||
==== String Substitution in Custom URLs
|
||||
|
||||
You can use dollar sign ($) delimited tokens in a custom URL. These tokens are
|
||||
substituted for the values of the corresponding fields in the anomaly records.
|
||||
For example, for a configured URL of
|
||||
`http://my.datastore.com/dashboards?user=$user_name$`, the value of the
|
||||
`user_name` field in the anomaly record is substituted into the `$user_name$`
|
||||
token when you click the link in the anomalies table.
|
||||
|
||||
NOTE: Not all fields in your source data exist in the anomaly results. If a
|
||||
field is specified in the detector as the `field_name`, `by_field_name`,
|
||||
`over_field_name`, or `partition_field_name`, for example, it can be used in a
|
||||
custom URL. A field that is only used in the `categorization_field_name`
|
||||
property, however, does not exist in the anomaly results.
|
||||
|
||||
The following keywords can also be used as tokens for string substitution in a
|
||||
custom URL: `$earliest$`; `$latest$`; `$mlcategoryregex$`; `$mlcategoryterms$`.
|
||||
|
||||
The `$earliest$` and `$latest$` tokens pass the beginning and end of the time
|
||||
span of the selected anomaly to the target page. The tokens are substituted with
|
||||
date-time strings in ISO-8601 format. If you selected an interval of 1 hour for
|
||||
the anomalies table, these tokens use one hour on either side of the anomaly
|
||||
time as the earliest and latest times. The same is also true if the interval is
|
||||
set to `Auto` and a one hour interval was chosen.
|
||||
|
||||
The `$mlcategoryregex$` and `$mlcategoryterms$` tokens pertain to jobs where you
|
||||
are categorizing field values. For more information about this type of analysis,
|
||||
see <<ml-configuring-categories>>.
|
||||
|
||||
The `$mlcategoryregex$` token passes the regular expression value of the
|
||||
category of the selected anomaly, as identified by the value of the `mlcategory`
|
||||
field of the anomaly record.
|
||||
|
||||
The `$mlcategoryterms$` token likewise passes the terms value of the category of
|
||||
the selected anomaly. Each categorization term is prefixed by a plus (+)
|
||||
character, so that when the token is passed to a {kib} dashboard, the resulting
|
||||
dashboard query seeks a match for all of the terms of the category.
|
||||
|
||||
For example, the following API updates a `log_categories` job to add a custom
|
||||
URL that uses `$earliest$`, `$latest$`, and `$mlcategoryterms$` tokens:
|
||||
|
||||
[source,js]
|
||||
----------------------------------
|
||||
POST _xpack/ml/anomaly_detectors/log_categories/_update
|
||||
{
|
||||
"custom_settings": {
|
||||
"custom_urls": [
|
||||
{
|
||||
"url_name": "test-link1",
|
||||
"url_value": "http://localhost:5601/app/kibana#/discover?_g=(refreshInterval:(display:Off,pause:!f,value:0),time:(from:'$earliest$',mode:quick,to:'$latest$'))&_a=(columns:!(_source),index:AV3OWB68ue3Ht69t29aw,interval:auto,query:(query_string:(analyze_wildcard:!t,query:'$mlcategoryterms$')),sort:!(time,desc))"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
----------------------------------
|
||||
|
||||
When you click this custom URL in the anomalies table in {kib}, it opens up the
|
||||
Discover page and displays source data for the period when the anomaly occurred.
|
||||
Since this job was categorizing log messages, some `$mlcategoryterms$` token
|
||||
values that were passed to the target page for an example anomaly are as follows:
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-categoryterms.jpg["A query for category terms on the Discover page in {kib}"]
|
||||
|
||||
[TIP]
|
||||
===============================
|
||||
* The custom URL links in the anomaly tables use pop-ups. You must configure
|
||||
your web browser so that it does not block pop-up windows or create an exception
|
||||
for your {kib} URL.
|
||||
* When creating a link to a {kib} dashboard, the URLs for dashboards can be very
|
||||
long. Be careful of typos, end of line characters, and URL encoding. Also ensure
|
||||
you use the appropriate index ID for the target {kib} index pattern.
|
||||
* If you use an influencer name for string substitution, keep in mind that it
|
||||
might not always be available in the analysis results and the URL is invalid in
|
||||
those cases. There is not always a statistically significant influencer for each
|
||||
anomaly.
|
||||
* The dates substituted for `$earliest$` and `$latest$` tokens are in
|
||||
ISO-8601 format and the target system must understand this format.
|
||||
* If the job performs an analysis against nested JSON fields, the tokens for
|
||||
string substitution can refer to these fields using dot notation. For example,
|
||||
`$cpu.total$`.
|
||||
* {es} source data mappings might make it difficult for the query string to work.
|
||||
Test the custom URL before saving the job configuration to check that it works
|
||||
as expected, particularly when using string substitution.
|
||||
===============================
|
|
@ -0,0 +1,40 @@
|
|||
[[ml-dfeeds]]
|
||||
=== {dfeeds-cap}
|
||||
|
||||
Machine learning jobs can analyze data that is stored in {es} or data that is
|
||||
sent from some other source via an API. _{dfeeds-cap}_ retrieve data from {es}
|
||||
for analysis, which is the simpler and more common scenario.
|
||||
|
||||
If you create jobs in {kib}, you must use {dfeeds}. When you create a job, you
|
||||
select an index pattern and {kib} configures the {dfeed} for you under the
|
||||
covers. If you use {ml} APIs instead, you can create a {dfeed} by using the
|
||||
{ref}/ml-put-datafeed.html[create {dfeeds} API] after you create a job. You can
|
||||
associate only one {dfeed} with each job.
|
||||
|
||||
For a description of all the {dfeed} properties, see
|
||||
{ref}/ml-datafeed-resource.html[Datafeed Resources].
|
||||
|
||||
To start retrieving data from {es}, you must start the {dfeed}. When you start
|
||||
it, you can optionally specify start and end times. If you do not specify an
|
||||
end time, the {dfeed} runs continuously. You can start and stop {dfeeds} in
|
||||
{kib} or use the {ref}/ml-start-datafeed.html[start {dfeeds}] and
|
||||
{ref}/ml-stop-datafeed.html[stop {dfeeds}] APIs. A {dfeed} can be started and
|
||||
stopped multiple times throughout its lifecycle.
|
||||
|
||||
[IMPORTANT]
|
||||
--
|
||||
When {security} is enabled, a {dfeed} stores the roles of the user who created
|
||||
or updated the {dfeed} at that time. This means that if those roles are updated,
|
||||
the {dfeed} subsequently runs with the new permissions that are associated with
|
||||
the roles. However, if the user’s roles are adjusted after creating or updating
|
||||
the {dfeed}, the {dfeed} continues to run with the permissions that were
|
||||
associated with the original roles.
|
||||
|
||||
One way to update the roles that are stored within the {dfeed} without changing
|
||||
any other settings is to submit an empty JSON document ({}) to the
|
||||
{ref}/ml-update-datafeed.html[update {dfeed} API].
|
||||
--
|
||||
|
||||
If the data that you want to analyze is not stored in {es}, you cannot use
|
||||
{dfeeds}. You can however send batches of data directly to the job by using the
|
||||
{ref}/ml-post-data.html[post data to jobs API].
|
|
@ -0,0 +1,69 @@
|
|||
[float]
|
||||
[[ml-forecasting]]
|
||||
=== Forecasting the Future
|
||||
|
||||
After the {xpackml} features create baselines of normal behavior for your data,
|
||||
you can use that information to extrapolate future behavior.
|
||||
|
||||
You can use a forecast to estimate a time series value at a specific future date.
|
||||
For example, you might want to determine how many users you can expect to visit
|
||||
your website next Sunday at 0900.
|
||||
|
||||
You can also use it to estimate the probability of a time series value occurring
|
||||
at a future date. For example, you might want to determine how likely it is that
|
||||
your disk utilization will reach 100% before the end of next week.
|
||||
|
||||
Each forecast has a unique ID, which you can use to distinguish between forecasts
|
||||
that you created at different times. You can create a forecast by using the
|
||||
{ref}/ml-forecast.html[Forecast Jobs API] or by using {kib}. For example:
|
||||
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job-forecast.jpg["Example screenshot from the Machine Learning Single Metric Viewer in Kibana"]
|
||||
|
||||
//For a more detailed walk-through of {xpackml} features, see <<ml-getting-started>>.
|
||||
|
||||
The yellow line in the chart represents the predicted data values. The
|
||||
shaded yellow area represents the bounds for the predicted values, which also
|
||||
gives an indication of the confidence of the predictions.
|
||||
|
||||
When you create a forecast, you specify its _duration_, which indicates how far
|
||||
the forecast extends beyond the last record that was processed. By default, the
|
||||
duration is 1 day. Typically the farther into the future that you forecast, the
|
||||
lower the confidence levels become (that is to say, the bounds increase).
|
||||
Eventually if the confidence levels are too low, the forecast stops.
|
||||
|
||||
You can also optionally specify when the forecast expires. By default, it
|
||||
expires in 14 days and is deleted automatically thereafter. You can specify a
|
||||
different expiration period by using the `expires_in` parameter in the
|
||||
{ref}/ml-forecast.html[Forecast Jobs API].
|
||||
|
||||
//Add examples of forecast_request_stats and forecast documents?
|
||||
|
||||
There are some limitations that affect your ability to create a forecast:
|
||||
|
||||
* You can generate only three forecasts concurrently. There is no limit to the
|
||||
number of forecasts that you retain. Existing forecasts are not overwritten when
|
||||
you create new forecasts. Rather, they are automatically deleted when they expire.
|
||||
* If you use an `over_field_name` property in your job (that is to say, it's a
|
||||
_population job_), you cannot create a forecast.
|
||||
* If you use any of the following analytical functions in your job, you
|
||||
cannot create a forecast:
|
||||
** `lat_long`
|
||||
** `rare` and `freq_rare`
|
||||
** `time_of_day` and `time_of_week`
|
||||
+
|
||||
--
|
||||
For more information about any of these functions, see <<ml-functions>>.
|
||||
--
|
||||
* Forecasts run concurrently with real-time {ml} analysis. That is to say, {ml}
|
||||
analysis does not stop while forecasts are generated. Forecasts can have an
|
||||
impact on {ml} jobs, however, especially in terms of memory usage. For this
|
||||
reason, forecasts run only if the model memory status is acceptable and the
|
||||
snapshot models for the forecast do not require more than 20 MB. If these memory
|
||||
limits are reached, consider splitting the job into multiple smaller jobs and
|
||||
creating forecasts for these.
|
||||
* The job must be open when you create a forecast. Otherwise, an error occurs.
|
||||
* If there is insufficient data to generate any meaningful predictions, an
|
||||
error occurs. In general, forecasts that are created early in the learning phase
|
||||
of the data analysis are less accurate.
|
|
@ -0,0 +1,79 @@
|
|||
[[ml-functions]]
|
||||
== Function Reference
|
||||
|
||||
The {xpackml} features include analysis functions that provide a wide variety of
|
||||
flexible ways to analyze data for anomalies.
|
||||
|
||||
When you create jobs, you specify one or more detectors, which define the type of
|
||||
analysis that needs to be done. If you are creating your job by using {ml} APIs,
|
||||
you specify the functions in
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
If you are creating your job in {kib}, you specify the functions differently
|
||||
depending on whether you are creating single metric, multi-metric, or advanced
|
||||
jobs. For a demonstration of creating jobs in {kib}, see <<ml-getting-started>>.
|
||||
|
||||
Most functions detect anomalies in both low and high values. In statistical
|
||||
terminology, they apply a two-sided test. Some functions offer low and high
|
||||
variations (for example, `count`, `low_count`, and `high_count`). These variations
|
||||
apply one-sided tests, detecting anomalies only when the values are low or
|
||||
high, depending one which alternative is used.
|
||||
|
||||
//For some functions, you can optionally specify a field name in the
|
||||
//`by_field_name` property. The analysis then considers whether there is an
|
||||
//anomaly for one of more specific values of that field. In {kib}, use the
|
||||
//**Key Fields** field in multi-metric jobs or the **by_field_name** field in
|
||||
//advanced jobs.
|
||||
////
|
||||
TODO: Per Sophie, "This is incorrect... Split Data refers to a partition_field_name. Over fields can only be added in Adv Config...
|
||||
|
||||
Can you please remove the explanations for by/over/partition fields from the documentation for analytical functions. It's a complex topic and will be easier to review in a separate exercise."
|
||||
////
|
||||
|
||||
//For some functions, you can also optionally specify a field name in the
|
||||
//`over_field_name` property. This property shifts the analysis to be population-
|
||||
//or peer-based and uses the field to split the data. In {kib}, use the
|
||||
//**Split Data** field in multi-metric jobs or the **over_field_name** field in
|
||||
//advanced jobs.
|
||||
|
||||
//You can specify a `partition_field_name` with any function. The analysis is then
|
||||
//segmented with completely independent baselines for each value of that field.
|
||||
//In {kib}, use the **partition_field_name** field in advanced jobs.
|
||||
|
||||
You can specify a `summary_count_field_name` with any function except `metric`.
|
||||
When you use `summary_count_field_name`, the {ml} features expect the input
|
||||
data to be pre-aggregated. The value of the `summary_count_field_name` field
|
||||
must contain the count of raw events that were summarized. In {kib}, use the
|
||||
**summary_count_field_name** in advanced jobs. Analyzing aggregated input data
|
||||
provides a significant boost in performance. For more information, see
|
||||
<<ml-configuring-aggregation>>.
|
||||
|
||||
If your data is sparse, there may be gaps in the data which means you might have
|
||||
empty buckets. You might want to treat these as anomalies or you might want these
|
||||
gaps to be ignored. Your decision depends on your use case and what is important
|
||||
to you. It also depends on which functions you use. The `sum` and `count`
|
||||
functions are strongly affected by empty buckets. For this reason, there are
|
||||
`non_null_sum` and `non_zero_count` functions, which are tolerant to sparse data.
|
||||
These functions effectively ignore empty buckets.
|
||||
|
||||
////
|
||||
Some functions can benefit from overlapping buckets. This improves the overall
|
||||
accuracy of the results but at the cost of a 2 bucket delay in seeing the results.
|
||||
|
||||
The table below provides a high-level summary of the analytical functions provided by the API. Each of the functions is described in detail over the following pages. Note the examples given in these pages use single Detector Configuration objects.
|
||||
////
|
||||
|
||||
* <<ml-count-functions>>
|
||||
* <<ml-geo-functions>>
|
||||
* <<ml-info-functions>>
|
||||
* <<ml-metric-functions>>
|
||||
* <<ml-rare-functions>>
|
||||
* <<ml-sum-functions>>
|
||||
* <<ml-time-functions>>
|
||||
|
||||
include::functions/count.asciidoc[]
|
||||
include::functions/geo.asciidoc[]
|
||||
include::functions/info.asciidoc[]
|
||||
include::functions/metric.asciidoc[]
|
||||
include::functions/rare.asciidoc[]
|
||||
include::functions/sum.asciidoc[]
|
||||
include::functions/time.asciidoc[]
|
|
@ -0,0 +1,214 @@
|
|||
[[ml-count-functions]]
|
||||
=== Count Functions
|
||||
|
||||
Count functions detect anomalies when the number of events in a bucket is
|
||||
anomalous.
|
||||
|
||||
Use `non_zero_count` functions if your data is sparse and you want to ignore
|
||||
cases where the bucket count is zero.
|
||||
|
||||
Use `distinct_count` functions to determine when the number of distinct values
|
||||
in one field is unusual, as opposed to the total count.
|
||||
|
||||
Use high-sided functions if you want to monitor unusually high event rates.
|
||||
Use low-sided functions if you want to look at drops in event rate.
|
||||
|
||||
The {xpackml} features include the following count functions:
|
||||
|
||||
* xref:ml-count[`count`, `high_count`, `low_count`]
|
||||
* xref:ml-nonzero-count[`non_zero_count`, `high_non_zero_count`, `low_non_zero_count`]
|
||||
* xref:ml-distinct-count[`distinct_count`, `high_distinct_count`, `low_distinct_count`]
|
||||
|
||||
[float]
|
||||
[[ml-count]]
|
||||
===== Count, High_count, Low_count
|
||||
|
||||
The `count` function detects anomalies when the number of events in a bucket is
|
||||
anomalous.
|
||||
|
||||
The `high_count` function detects anomalies when the count of events in a
|
||||
bucket are unusually high.
|
||||
|
||||
The `low_count` function detects anomalies when the count of events in a
|
||||
bucket are unusually low.
|
||||
|
||||
These functions support the following properties:
|
||||
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties,
|
||||
see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 1: Analyzing events with the count function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{ "function" : "count" }
|
||||
--------------------------------------------------
|
||||
|
||||
This example is probably the simplest possible analysis. It identifies
|
||||
time buckets during which the overall count of events is higher or lower than
|
||||
usual.
|
||||
|
||||
When you use this function in a detector in your job, it models the event rate
|
||||
and detects when the event rate is unusual compared to its past behavior.
|
||||
|
||||
.Example 2: Analyzing errors with the high_count function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "high_count",
|
||||
"by_field_name" : "error_code",
|
||||
"over_field_name": "user"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `high_count` function in a detector in your job, it
|
||||
models the event rate for each error code. It detects users that generate an
|
||||
unusually high count of error codes compared to other users.
|
||||
|
||||
|
||||
.Example 3: Analyzing status codes with the low_count function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "low_count",
|
||||
"by_field_name" : "status_code"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
In this example, the function detects when the count of events for a
|
||||
status code is lower than usual.
|
||||
|
||||
When you use this function in a detector in your job, it models the event rate
|
||||
for each status code and detects when a status code has an unusually low count
|
||||
compared to its past behavior.
|
||||
|
||||
.Example 4: Analyzing aggregated data with the count function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"summary_count_field_name" : "events_per_min",
|
||||
"detectors" [
|
||||
{ "function" : "count" }
|
||||
]
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you are analyzing an aggregated `events_per_min` field, do not use a sum
|
||||
function (for example, `sum(events_per_min)`). Instead, use the count function
|
||||
and the `summary_count_field_name` property.
|
||||
//TO-DO: For more information, see <<aggreggations.asciidoc>>.
|
||||
|
||||
[float]
|
||||
[[ml-nonzero-count]]
|
||||
===== Non_zero_count, High_non_zero_count, Low_non_zero_count
|
||||
|
||||
The `non_zero_count` function detects anomalies when the number of events in a
|
||||
bucket is anomalous, but it ignores cases where the bucket count is zero. Use
|
||||
this function if you know your data is sparse or has gaps and the gaps are not
|
||||
important.
|
||||
|
||||
The `high_non_zero_count` function detects anomalies when the number of events
|
||||
in a bucket is unusually high and it ignores cases where the bucket count is
|
||||
zero.
|
||||
|
||||
The `low_non_zero_count` function detects anomalies when the number of events in
|
||||
a bucket is unusually low and it ignores cases where the bucket count is zero.
|
||||
|
||||
These functions support the following properties:
|
||||
|
||||
* `by_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties,
|
||||
see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
For example, if you have the following number of events per bucket:
|
||||
|
||||
========================================
|
||||
|
||||
1,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,43,31,0,0,0,0,0,0,0,0,0,0,0,0,2,1
|
||||
|
||||
========================================
|
||||
|
||||
The `non_zero_count` function models only the following data:
|
||||
|
||||
========================================
|
||||
|
||||
1,22,2,43,31,2,1
|
||||
|
||||
========================================
|
||||
|
||||
.Example 5: Analyzing signatures with the high_non_zero_count function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "high_non_zero_count",
|
||||
"by_field_name" : "signaturename"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `high_non_zero_count` function in a detector in your job, it
|
||||
models the count of events for the `signaturename` field. It ignores any buckets
|
||||
where the count is zero and detects when a `signaturename` value has an
|
||||
unusually high count of events compared to its past behavior.
|
||||
|
||||
NOTE: Population analysis (using an `over_field_name` property value) is not
|
||||
supported for the `non_zero_count`, `high_non_zero_count`, and
|
||||
`low_non_zero_count` functions. If you want to do population analysis and your
|
||||
data is sparse, use the `count` functions, which are optimized for that scenario.
|
||||
|
||||
|
||||
[float]
|
||||
[[ml-distinct-count]]
|
||||
===== Distinct_count, High_distinct_count, Low_distinct_count
|
||||
|
||||
The `distinct_count` function detects anomalies where the number of distinct
|
||||
values in one field is unusual.
|
||||
|
||||
The `high_distinct_count` function detects unusually high numbers of distinct
|
||||
values in one field.
|
||||
|
||||
The `low_distinct_count` function detects unusually low numbers of distinct
|
||||
values in one field.
|
||||
|
||||
These functions support the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties,
|
||||
see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 6: Analyzing users with the distinct_count function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "distinct_count",
|
||||
"field_name" : "user"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
This `distinct_count` function detects when a system has an unusual number
|
||||
of logged in users. When you use this function in a detector in your job, it
|
||||
models the distinct count of users. It also detects when the distinct number of
|
||||
users is unusual compared to the past.
|
||||
|
||||
.Example 7: Analyzing ports with the high_distinct_count function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "high_distinct_count",
|
||||
"field_name" : "dst_port",
|
||||
"over_field_name": "src_ip"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
This example detects instances of port scanning. When you use this function in a
|
||||
detector in your job, it models the distinct count of ports. It also detects the
|
||||
`src_ip` values that connect to an unusually high number of different
|
||||
`dst_ports` values compared to other `src_ip` values.
|
|
@ -0,0 +1,79 @@
|
|||
[[ml-geo-functions]]
|
||||
=== Geographic Functions
|
||||
|
||||
The geographic functions detect anomalies in the geographic location of the
|
||||
input data.
|
||||
|
||||
The {xpackml} features include the following geographic function: `lat_long`.
|
||||
|
||||
NOTE: You cannot create forecasts for jobs that contain geographic functions.
|
||||
|
||||
[float]
|
||||
[[ml-lat-long]]
|
||||
==== Lat_long
|
||||
|
||||
The `lat_long` function detects anomalies in the geographic location of the
|
||||
input data.
|
||||
|
||||
This function supports the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties,
|
||||
see {ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 1: Analyzing transactions with the lat_long function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "lat_long",
|
||||
"field_name" : "transactionCoordinates",
|
||||
"by_field_name" : "creditCardNumber"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `lat_long` function in a detector in your job, it
|
||||
detects anomalies where the geographic location of a credit card transaction is
|
||||
unusual for a particular customer’s credit card. An anomaly might indicate fraud.
|
||||
|
||||
IMPORTANT: The `field_name` that you supply must be a single string that contains
|
||||
two comma-separated numbers of the form `latitude,longitude`. The `latitude` and
|
||||
`longitude` must be in the range -180 to 180 and represent a point on the
|
||||
surface of the Earth.
|
||||
|
||||
For example, JSON data might contain the following transaction coordinates:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"time": 1460464275,
|
||||
"transactionCoordinates": "40.7,-74.0",
|
||||
"creditCardNumber": "1234123412341234"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
In {es}, location data is likely to be stored in `geo_point` fields. For more
|
||||
information, see {ref}/geo-point.html[Geo-point datatype]. This data type is not
|
||||
supported natively in {xpackml} features. You can, however, use Painless scripts
|
||||
in `script_fields` in your {dfeed} to transform the data into an appropriate
|
||||
format. For example, the following Painless script transforms
|
||||
`"coords": {"lat" : 41.44, "lon":90.5}` into `"lat-lon": "41.44,90.5"`:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"script_fields": {
|
||||
"lat-lon": {
|
||||
"script": {
|
||||
"source": "doc['coords'].lat + ',' + doc['coords'].lon",
|
||||
"lang": "painless"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
For more information, see <<ml-configuring-transform>>.
|
|
@ -0,0 +1,87 @@
|
|||
[[ml-info-functions]]
|
||||
=== Information Content Functions
|
||||
|
||||
The information content functions detect anomalies in the amount of information
|
||||
that is contained in strings within a bucket. These functions can be used as
|
||||
a more sophisticated method to identify incidences of data exfiltration or
|
||||
C2C activity, when analyzing the size in bytes of the data might not be sufficient.
|
||||
|
||||
The {xpackml} features include the following information content functions:
|
||||
|
||||
* `info_content`, `high_info_content`, `low_info_content`
|
||||
|
||||
[float]
|
||||
[[ml-info-content]]
|
||||
==== Info_content, High_info_content, Low_info_content
|
||||
|
||||
The `info_content` function detects anomalies in the amount of information that
|
||||
is contained in strings in a bucket.
|
||||
|
||||
If you want to monitor for unusually high amounts of information,
|
||||
use `high_info_content`.
|
||||
If want to look at drops in information content, use `low_info_content`.
|
||||
|
||||
These functions support the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 1: Analyzing subdomain strings with the info_content function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "info_content",
|
||||
"field_name" : "subdomain",
|
||||
"over_field_name" : "highest_registered_domain"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `info_content` function in a detector in your job, it models
|
||||
information that is present in the `subdomain` string. It detects anomalies
|
||||
where the information content is unusual compared to the other
|
||||
`highest_registered_domain` values. An anomaly could indicate an abuse of the
|
||||
DNS protocol, such as malicious command and control activity.
|
||||
|
||||
NOTE: In this example, both high and low values are considered anomalous.
|
||||
In many use cases, the `high_info_content` function is often a more appropriate
|
||||
choice.
|
||||
|
||||
.Example 2: Analyzing query strings with the high_info_content function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "high_info_content",
|
||||
"field_name" : "query",
|
||||
"over_field_name" : "src_ip"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `high_info_content` function in a detector in your job, it
|
||||
models information content that is held in the DNS query string. It detects
|
||||
`src_ip` values where the information content is unusually high compared to
|
||||
other `src_ip` values. This example is similar to the example for the
|
||||
`info_content` function, but it reports anomalies only where the amount of
|
||||
information content is higher than expected.
|
||||
|
||||
.Example 3: Analyzing message strings with the low_info_content function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "low_info_content",
|
||||
"field_name" : "message",
|
||||
"by_field_name" : "logfilename"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `low_info_content` function in a detector in your job, it models
|
||||
information content that is present in the message string for each
|
||||
`logfilename`. It detects anomalies where the information content is low
|
||||
compared to its past behavior. For example, this function detects unusually low
|
||||
amounts of information in a collection of rolling log files. Low information
|
||||
might indicate that a process has entered an infinite loop or that logging
|
||||
features have been disabled.
|
|
@ -0,0 +1,310 @@
|
|||
[[ml-metric-functions]]
|
||||
=== Metric Functions
|
||||
|
||||
The metric functions include functions such as mean, min and max. These values
|
||||
are calculated for each bucket. Field values that cannot be converted to
|
||||
double precision floating point numbers are ignored.
|
||||
|
||||
The {xpackml} features include the following metric functions:
|
||||
|
||||
* <<ml-metric-min,`min`>>
|
||||
* <<ml-metric-max,`max`>>
|
||||
* xref:ml-metric-median[`median`, `high_median`, `low_median`]
|
||||
* xref:ml-metric-mean[`mean`, `high_mean`, `low_mean`]
|
||||
* <<ml-metric-metric,`metric`>>
|
||||
* xref:ml-metric-varp[`varp`, `high_varp`, `low_varp`]
|
||||
|
||||
[float]
|
||||
[[ml-metric-min]]
|
||||
==== Min
|
||||
|
||||
The `min` function detects anomalies in the arithmetic minimum of a value.
|
||||
The minimum value is calculated for each bucket.
|
||||
|
||||
High- and low-sided functions are not applicable.
|
||||
|
||||
This function supports the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 1: Analyzing minimum transactions with the min function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "min",
|
||||
"field_name" : "amt",
|
||||
"by_field_name" : "product"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `min` function in a detector in your job, it detects where the
|
||||
smallest transaction is lower than previously observed. You can use this
|
||||
function to detect items for sale at unintentionally low prices due to data
|
||||
entry mistakes. It models the minimum amount for each product over time.
|
||||
|
||||
[float]
|
||||
[[ml-metric-max]]
|
||||
==== Max
|
||||
|
||||
The `max` function detects anomalies in the arithmetic maximum of a value.
|
||||
The maximum value is calculated for each bucket.
|
||||
|
||||
High- and low-sided functions are not applicable.
|
||||
|
||||
This function supports the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 2: Analyzing maximum response times with the max function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "max",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `max` function in a detector in your job, it detects where the
|
||||
longest `responsetime` is longer than previously observed. You can use this
|
||||
function to detect applications that have `responsetime` values that are
|
||||
unusually lengthy. It models the maximum `responsetime` for each application
|
||||
over time and detects when the longest `responsetime` is unusually long compared
|
||||
to previous applications.
|
||||
|
||||
.Example 3: Two detectors with max and high_mean functions
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "max",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
},
|
||||
{
|
||||
"function" : "high_mean",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
The analysis in the previous example can be performed alongside `high_mean`
|
||||
functions by application. By combining detectors and using the same influencer
|
||||
this job can detect both unusually long individual response times and average
|
||||
response times for each bucket.
|
||||
|
||||
[float]
|
||||
[[ml-metric-median]]
|
||||
==== Median, High_median, Low_median
|
||||
|
||||
The `median` function detects anomalies in the statistical median of a value.
|
||||
The median value is calculated for each bucket.
|
||||
|
||||
If you want to monitor unusually high median values, use the `high_median`
|
||||
function.
|
||||
|
||||
If you are just interested in unusually low median values, use the `low_median`
|
||||
function.
|
||||
|
||||
These functions support the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 4: Analyzing response times with the median function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "median",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `median` function in a detector in your job, it models the
|
||||
median `responsetime` for each application over time. It detects when the median
|
||||
`responsetime` is unusual compared to previous `responsetime` values.
|
||||
|
||||
[float]
|
||||
[[ml-metric-mean]]
|
||||
==== Mean, High_mean, Low_mean
|
||||
|
||||
The `mean` function detects anomalies in the arithmetic mean of a value.
|
||||
The mean value is calculated for each bucket.
|
||||
|
||||
If you want to monitor unusually high average values, use the `high_mean`
|
||||
function.
|
||||
|
||||
If you are just interested in unusually low average values, use the `low_mean`
|
||||
function.
|
||||
|
||||
These functions support the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 5: Analyzing response times with the mean function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "mean",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `mean` function in a detector in your job, it models the mean
|
||||
`responsetime` for each application over time. It detects when the mean
|
||||
`responsetime` is unusual compared to previous `responsetime` values.
|
||||
|
||||
.Example 6: Analyzing response times with the high_mean function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "high_mean",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `high_mean` function in a detector in your job, it models the
|
||||
mean `responsetime` for each application over time. It detects when the mean
|
||||
`responsetime` is unusually high compared to previous `responsetime` values.
|
||||
|
||||
.Example 7: Analyzing response times with the low_mean function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "low_mean",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `low_mean` function in a detector in your job, it models the
|
||||
mean `responsetime` for each application over time. It detects when the mean
|
||||
`responsetime` is unusually low compared to previous `responsetime` values.
|
||||
|
||||
[float]
|
||||
[[ml-metric-metric]]
|
||||
==== Metric
|
||||
|
||||
The `metric` function combines `min`, `max`, and `mean` functions. You can use
|
||||
it as a shorthand for a combined analysis. If you do not specify a function in
|
||||
a detector, this is the default function.
|
||||
//TBD: Is that default behavior still true?
|
||||
|
||||
High- and low-sided functions are not applicable. You cannot use this function
|
||||
when a `summary_count_field_name` is specified.
|
||||
|
||||
This function supports the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 8: Analyzing response times with the metric function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "metric",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `metric` function in a detector in your job, it models the
|
||||
mean, min, and max `responsetime` for each application over time. It detects
|
||||
when the mean, min, or max `responsetime` is unusual compared to previous
|
||||
`responsetime` values.
|
||||
|
||||
[float]
|
||||
[[ml-metric-varp]]
|
||||
==== Varp, High_varp, Low_varp
|
||||
|
||||
The `varp` function detects anomalies in the variance of a value which is a
|
||||
measure of the variability and spread in the data.
|
||||
|
||||
If you want to monitor unusually high variance, use the `high_varp` function.
|
||||
|
||||
If you are just interested in unusually low variance, use the `low_varp` function.
|
||||
|
||||
These functions support the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 9: Analyzing response times with the varp function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "varp",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `varp` function in a detector in your job, it models the
|
||||
variance in values of `responsetime` for each application over time. It detects
|
||||
when the variance in `responsetime` is unusual compared to past application
|
||||
behavior.
|
||||
|
||||
.Example 10: Analyzing response times with the high_varp function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "high_varp",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `high_varp` function in a detector in your job, it models the
|
||||
variance in values of `responsetime` for each application over time. It detects
|
||||
when the variance in `responsetime` is unusual compared to past application
|
||||
behavior.
|
||||
|
||||
.Example 11: Analyzing response times with the low_varp function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "low_varp",
|
||||
"field_name" : "responsetime",
|
||||
"by_field_name" : "application"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `low_varp` function in a detector in your job, it models the
|
||||
variance in values of `responsetime` for each application over time. It detects
|
||||
when the variance in `responsetime` is unusual compared to past application
|
||||
behavior.
|
|
@ -0,0 +1,128 @@
|
|||
[[ml-rare-functions]]
|
||||
=== Rare Functions
|
||||
|
||||
The rare functions detect values that occur rarely in time or rarely for a
|
||||
population.
|
||||
|
||||
The `rare` analysis detects anomalies according to the number of distinct rare
|
||||
values. This differs from `freq_rare`, which detects anomalies according to the
|
||||
number of times (frequency) rare values occur.
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
* The `rare` and `freq_rare` functions should not be used in conjunction with
|
||||
`exclude_frequent`.
|
||||
* You cannot create forecasts for jobs that contain `rare` or `freq_rare`
|
||||
functions.
|
||||
* Shorter bucket spans (less than 1 hour, for example) are recommended when
|
||||
looking for rare events. The functions model whether something happens in a
|
||||
bucket at least once. With longer bucket spans, it is more likely that
|
||||
entities will be seen in a bucket and therefore they appear less rare.
|
||||
Picking the ideal the bucket span depends on the characteristics of the data
|
||||
with shorter bucket spans typically being measured in minutes, not hours.
|
||||
* To model rare data, a learning period of at least 20 buckets is required
|
||||
for typical data.
|
||||
====
|
||||
|
||||
The {xpackml} features include the following rare functions:
|
||||
|
||||
* <<ml-rare,`rare`>>
|
||||
* <<ml-freq-rare,`freq_rare`>>
|
||||
|
||||
|
||||
[float]
|
||||
[[ml-rare]]
|
||||
==== Rare
|
||||
|
||||
The `rare` function detects values that occur rarely in time or rarely for a
|
||||
population. It detects anomalies according to the number of distinct rare values.
|
||||
|
||||
This function supports the following properties:
|
||||
|
||||
* `by_field_name` (required)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 1: Analyzing status codes with the rare function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "rare",
|
||||
"by_field_name" : "status"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `rare` function in a detector in your job, it detects values
|
||||
that are rare in time. It models status codes that occur over time and detects
|
||||
when rare status codes occur compared to the past. For example, you can detect
|
||||
status codes in a web access log that have never (or rarely) occurred before.
|
||||
|
||||
.Example 2: Analyzing status codes in a population with the rare function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "rare",
|
||||
"by_field_name" : "status",
|
||||
"over_field_name" : "clientip"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `rare` function in a detector in your job, it detects values
|
||||
that are rare in a population. It models status code and client IP interactions
|
||||
that occur. It defines a rare status code as one that occurs for few client IP
|
||||
values compared to the population. It detects client IP values that experience
|
||||
one or more distinct rare status codes compared to the population. For example
|
||||
in a web access log, a `clientip` that experiences the highest number of
|
||||
different rare status codes compared to the population is regarded as highly
|
||||
anomalous. This analysis is based on the number of different status code values,
|
||||
not the count of occurrences.
|
||||
|
||||
NOTE: To define a status code as rare the {xpackml} features look at the number
|
||||
of distinct status codes that occur, not the number of times the status code
|
||||
occurs. If a single client IP experiences a single unique status code, this
|
||||
is rare, even if it occurs for that client IP in every bucket.
|
||||
|
||||
[float]
|
||||
[[ml-freq-rare]]
|
||||
==== Freq_rare
|
||||
|
||||
The `freq_rare` function detects values that occur rarely for a population.
|
||||
It detects anomalies according to the number of times (frequency) that rare
|
||||
values occur.
|
||||
|
||||
This function supports the following properties:
|
||||
|
||||
* `by_field_name` (required)
|
||||
* `over_field_name` (required)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 3: Analyzing URI values in a population with the freq_rare function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "freq_rare",
|
||||
"by_field_name" : "uri",
|
||||
"over_field_name" : "clientip"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `freq_rare` function in a detector in your job, it
|
||||
detects values that are frequently rare in a population. It models URI paths and
|
||||
client IP interactions that occur. It defines a rare URI path as one that is
|
||||
visited by few client IP values compared to the population. It detects the
|
||||
client IP values that experience many interactions with rare URI paths compared
|
||||
to the population. For example in a web access log, a client IP that visits
|
||||
one or more rare URI paths many times compared to the population is regarded as
|
||||
highly anomalous. This analysis is based on the count of interactions with rare
|
||||
URI paths, not the number of different URI path values.
|
||||
|
||||
NOTE: To define a URI path as rare, the analytics consider the number of
|
||||
distinct values that occur and not the number of times the URI path occurs.
|
||||
If a single client IP visits a single unique URI path, this is rare, even if it
|
||||
occurs for that client IP in every bucket.
|
|
@ -0,0 +1,119 @@
|
|||
|
||||
[[ml-sum-functions]]
|
||||
=== Sum Functions
|
||||
|
||||
The sum functions detect anomalies when the sum of a field in a bucket is anomalous.
|
||||
|
||||
If you want to monitor unusually high totals, use high-sided functions.
|
||||
|
||||
If want to look at drops in totals, use low-sided functions.
|
||||
|
||||
If your data is sparse, use `non_null_sum` functions. Buckets without values are
|
||||
ignored; buckets with a zero value are analyzed.
|
||||
|
||||
The {xpackml} features include the following sum functions:
|
||||
|
||||
* xref:ml-sum[`sum`, `high_sum`, `low_sum`]
|
||||
* xref:ml-nonnull-sum[`non_null_sum`, `high_non_null_sum`, `low_non_null_sum`]
|
||||
|
||||
////
|
||||
TBD: Incorporate from prelert docs?:
|
||||
Input data may contain pre-calculated fields giving the total count of some value e.g. transactions per minute.
|
||||
Ensure you are familiar with our advice on Summarization of Input Data, as this is likely to provide
|
||||
a more appropriate method to using the sum function.
|
||||
////
|
||||
|
||||
[float]
|
||||
[[ml-sum]]
|
||||
==== Sum, High_sum, Low_sum
|
||||
|
||||
The `sum` function detects anomalies where the sum of a field in a bucket is
|
||||
anomalous.
|
||||
|
||||
If you want to monitor unusually high sum values, use the `high_sum` function.
|
||||
|
||||
If you want to monitor unusually low sum values, use the `low_sum` function.
|
||||
|
||||
These functions support the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 1: Analyzing total expenses with the sum function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "sum",
|
||||
"field_name" : "expenses",
|
||||
"by_field_name" : "costcenter",
|
||||
"over_field_name" : "employee"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `sum` function in a detector in your job, it
|
||||
models total expenses per employees for each cost center. For each time bucket,
|
||||
it detects when an employee’s expenses are unusual for a cost center compared
|
||||
to other employees.
|
||||
|
||||
.Example 2: Analyzing total bytes with the high_sum function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "high_sum",
|
||||
"field_name" : "cs_bytes",
|
||||
"over_field_name" : "cs_host"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `high_sum` function in a detector in your job, it
|
||||
models total `cs_bytes`. It detects `cs_hosts` that transfer unusually high
|
||||
volumes compared to other `cs_hosts`. This example looks for volumes of data
|
||||
transferred from a client to a server on the internet that are unusual compared
|
||||
to other clients. This scenario could be useful to detect data exfiltration or
|
||||
to find users that are abusing internet privileges.
|
||||
|
||||
[float]
|
||||
[[ml-nonnull-sum]]
|
||||
==== Non_null_sum, High_non_null_sum, Low_non_null_sum
|
||||
|
||||
The `non_null_sum` function is useful if your data is sparse. Buckets without
|
||||
values are ignored and buckets with a zero value are analyzed.
|
||||
|
||||
If you want to monitor unusually high totals, use the `high_non_null_sum`
|
||||
function.
|
||||
|
||||
If you want to look at drops in totals, use the `low_non_null_sum` function.
|
||||
|
||||
These functions support the following properties:
|
||||
|
||||
* `field_name` (required)
|
||||
* `by_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
NOTE: Population analysis (that is to say, use of the `over_field_name` property)
|
||||
is not applicable for this function.
|
||||
|
||||
.Example 3: Analyzing employee approvals with the high_non_null_sum function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "high_non_null_sum",
|
||||
"fieldName" : "amount_approved",
|
||||
"byFieldName" : "employee"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `high_non_null_sum` function in a detector in your job, it
|
||||
models the total `amount_approved` for each employee. It ignores any buckets
|
||||
where the amount is null. It detects employees who approve unusually high
|
||||
amounts compared to their past behavior.
|
||||
//For this credit control system analysis, using non_null_sum will ignore
|
||||
//periods where the employees are not active on the system.
|
|
@ -0,0 +1,99 @@
|
|||
[[ml-time-functions]]
|
||||
=== Time Functions
|
||||
|
||||
The time functions detect events that happen at unusual times, either of the day
|
||||
or of the week. These functions can be used to find unusual patterns of behavior,
|
||||
typically associated with suspicious user activity.
|
||||
|
||||
The {xpackml} features include the following time functions:
|
||||
|
||||
* <<ml-time-of-day,`time_of_day`>>
|
||||
* <<ml-time-of-week,`time_of_week`>>
|
||||
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
* NOTE: You cannot create forecasts for jobs that contain time functions.
|
||||
* The `time_of_day` function is not aware of the difference between days, for instance
|
||||
work days and weekends. When modeling different days, use the `time_of_week` function.
|
||||
In general, the `time_of_week` function is more suited to modeling the behavior of people
|
||||
rather than machines, as people vary their behavior according to the day of the week.
|
||||
* Shorter bucket spans (for example, 10 minutes) are recommended when performing a
|
||||
`time_of_day` or `time_of_week` analysis. The time of the events being modeled are not
|
||||
affected by the bucket span, but a shorter bucket span enables quicker alerting on unusual
|
||||
events.
|
||||
* Unusual events are flagged based on the previous pattern of the data, not on what we
|
||||
might think of as unusual based on human experience. So, if events typically occur
|
||||
between 3 a.m. and 5 a.m., and event occurring at 3 p.m. is be flagged as unusual.
|
||||
* When Daylight Saving Time starts or stops, regular events can be flagged as anomalous.
|
||||
This situation occurs because the actual time of the event (as measured against a UTC
|
||||
baseline) has changed. This situation is treated as a step change in behavior and the new
|
||||
times will be learned quickly.
|
||||
====
|
||||
|
||||
[float]
|
||||
[[ml-time-of-day]]
|
||||
==== Time_of_day
|
||||
|
||||
The `time_of_day` function detects when events occur that are outside normal
|
||||
usage patterns. For example, it detects unusual activity in the middle of the
|
||||
night.
|
||||
|
||||
The function expects daily behavior to be similar. If you expect the behavior of
|
||||
your data to differ on Saturdays compared to Wednesdays, the `time_of_week`
|
||||
function is more appropriate.
|
||||
|
||||
This function supports the following properties:
|
||||
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 1: Analyzing events with the time_of_day function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "time_of_day",
|
||||
"by_field_name" : "process"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `time_of_day` function in a detector in your job, it
|
||||
models when events occur throughout a day for each process. It detects when an
|
||||
event occurs for a process that is at an unusual time in the day compared to
|
||||
its past behavior.
|
||||
|
||||
[float]
|
||||
[[ml-time-of-week]]
|
||||
==== Time_of_week
|
||||
|
||||
The `time_of_week` function detects when events occur that are outside normal
|
||||
usage patterns. For example, it detects login events on the weekend.
|
||||
|
||||
This function supports the following properties:
|
||||
|
||||
* `by_field_name` (optional)
|
||||
* `over_field_name` (optional)
|
||||
* `partition_field_name` (optional)
|
||||
|
||||
For more information about those properties, see
|
||||
{ref}/ml-job-resource.html#ml-detectorconfig[Detector Configuration Objects].
|
||||
|
||||
.Example 2: Analyzing events with the time_of_week function
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"function" : "time_of_week",
|
||||
"by_field_name" : "eventcode",
|
||||
"over_field_name" : "workstation"
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
If you use this `time_of_week` function in a detector in your job, it
|
||||
models when events occur throughout the week for each `eventcode`. It detects
|
||||
when a workstation event occurs at an unusual time during the week for that
|
||||
`eventcode` compared to other workstations. It detects events for a
|
||||
particular workstation that are outside the normal usage pattern.
|
|
@ -0,0 +1,210 @@
|
|||
[[ml-gs-data]]
|
||||
=== Identifying Data for Analysis
|
||||
|
||||
For the purposes of this tutorial, we provide sample data that you can play with
|
||||
and search in {es}. When you consider your own data, however, it's important to
|
||||
take a moment and think about where the {xpackml} features will be most
|
||||
impactful.
|
||||
|
||||
The first consideration is that it must be time series data. The {ml} features
|
||||
are designed to model and detect anomalies in time series data.
|
||||
|
||||
The second consideration, especially when you are first learning to use {ml},
|
||||
is the importance of the data and how familiar you are with it. Ideally, it is
|
||||
information that contains key performance indicators (KPIs) for the health,
|
||||
security, or success of your business or system. It is information that you need
|
||||
to monitor and act on when anomalous behavior occurs. You might even have {kib}
|
||||
dashboards that you're already using to watch this data. The better you know the
|
||||
data, the quicker you will be able to create {ml} jobs that generate useful
|
||||
insights.
|
||||
|
||||
The final consideration is where the data is located. This tutorial assumes that
|
||||
your data is stored in {es}. It guides you through the steps required to create
|
||||
a _{dfeed}_ that passes data to a job. If your own data is outside of {es},
|
||||
analysis is still possible by using a post data API.
|
||||
|
||||
IMPORTANT: If you want to create {ml} jobs in {kib}, you must use {dfeeds}.
|
||||
That is to say, you must store your input data in {es}. When you create
|
||||
a job, you select an existing index pattern and {kib} configures the {dfeed}
|
||||
for you under the covers.
|
||||
|
||||
|
||||
[float]
|
||||
[[ml-gs-sampledata]]
|
||||
==== Obtaining a Sample Data Set
|
||||
|
||||
In this step we will upload some sample data to {es}. This is standard
|
||||
{es} functionality, and is needed to set the stage for using {ml}.
|
||||
|
||||
The sample data for this tutorial contains information about the requests that
|
||||
are received by various applications and services in a system. A system
|
||||
administrator might use this type of information to track the total number of
|
||||
requests across all of the infrastructure. If the number of requests increases
|
||||
or decreases unexpectedly, for example, this might be an indication that there
|
||||
is a problem or that resources need to be redistributed. By using the {xpack}
|
||||
{ml} features to model the behavior of this data, it is easier to identify
|
||||
anomalies and take appropriate action.
|
||||
|
||||
Download this sample data by clicking here:
|
||||
https://download.elastic.co/demos/machine_learning/gettingstarted/server_metrics.tar.gz[server_metrics.tar.gz]
|
||||
|
||||
Use the following commands to extract the files:
|
||||
|
||||
[source,sh]
|
||||
----------------------------------
|
||||
tar -zxvf server_metrics.tar.gz
|
||||
----------------------------------
|
||||
|
||||
Each document in the server-metrics data set has the following schema:
|
||||
|
||||
[source,js]
|
||||
----------------------------------
|
||||
{
|
||||
"index":
|
||||
{
|
||||
"_index":"server-metrics",
|
||||
"_type":"metric",
|
||||
"_id":"1177"
|
||||
}
|
||||
}
|
||||
{
|
||||
"@timestamp":"2017-03-23T13:00:00",
|
||||
"accept":36320,
|
||||
"deny":4156,
|
||||
"host":"server_2",
|
||||
"response":2.4558210155,
|
||||
"service":"app_3",
|
||||
"total":40476
|
||||
}
|
||||
----------------------------------
|
||||
// NOTCONSOLE
|
||||
|
||||
TIP: The sample data sets include summarized data. For example, the `total`
|
||||
value is a sum of the requests that were received by a specific service at a
|
||||
particular time. If your data is stored in {es}, you can generate
|
||||
this type of sum or average by using aggregations. One of the benefits of
|
||||
summarizing data this way is that {es} automatically distributes
|
||||
these calculations across your cluster. You can then feed this summarized data
|
||||
into {xpackml} instead of raw results, which reduces the volume
|
||||
of data that must be considered while detecting anomalies. For the purposes of
|
||||
this tutorial, however, these summary values are stored in {es}. For more
|
||||
information, see <<ml-configuring-aggregation>>.
|
||||
|
||||
Before you load the data set, you need to set up {ref}/mapping.html[_mappings_]
|
||||
for the fields. Mappings divide the documents in the index into logical groups
|
||||
and specify a field's characteristics, such as the field's searchability or
|
||||
whether or not it's _tokenized_, or broken up into separate words.
|
||||
|
||||
The sample data includes an `upload_server-metrics.sh` script, which you can use
|
||||
to create the mappings and load the data set. You can download it by clicking
|
||||
here: https://download.elastic.co/demos/machine_learning/gettingstarted/upload_server-metrics.sh[upload_server-metrics.sh]
|
||||
Before you run it, however, you must edit the USERNAME and PASSWORD variables
|
||||
with your actual user ID and password.
|
||||
|
||||
The script runs a command similar to the following example, which sets up a
|
||||
mapping for the data set:
|
||||
|
||||
[source,sh]
|
||||
----------------------------------
|
||||
curl -u elastic:x-pack-test-password -X PUT -H 'Content-Type: application/json'
|
||||
http://localhost:9200/server-metrics -d '{
|
||||
"settings":{
|
||||
"number_of_shards":1,
|
||||
"number_of_replicas":0
|
||||
},
|
||||
"mappings":{
|
||||
"metric":{
|
||||
"properties":{
|
||||
"@timestamp":{
|
||||
"type":"date"
|
||||
},
|
||||
"accept":{
|
||||
"type":"long"
|
||||
},
|
||||
"deny":{
|
||||
"type":"long"
|
||||
},
|
||||
"host":{
|
||||
"type":"keyword"
|
||||
},
|
||||
"response":{
|
||||
"type":"float"
|
||||
},
|
||||
"service":{
|
||||
"type":"keyword"
|
||||
},
|
||||
"total":{
|
||||
"type":"long"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
----------------------------------
|
||||
// NOTCONSOLE
|
||||
|
||||
NOTE: If you run this command, you must replace `x-pack-test-password` with your
|
||||
actual password.
|
||||
|
||||
You can then use the {es} `bulk` API to load the data set. The
|
||||
`upload_server-metrics.sh` script runs commands similar to the following
|
||||
example, which loads the four JSON files:
|
||||
|
||||
[source,sh]
|
||||
----------------------------------
|
||||
curl -u elastic:x-pack-test-password -X POST -H "Content-Type: application/json"
|
||||
http://localhost:9200/server-metrics/_bulk --data-binary "@server-metrics_1.json"
|
||||
|
||||
curl -u elastic:x-pack-test-password -X POST -H "Content-Type: application/json"
|
||||
http://localhost:9200/server-metrics/_bulk --data-binary "@server-metrics_2.json"
|
||||
|
||||
curl -u elastic:x-pack-test-password -X POST -H "Content-Type: application/json"
|
||||
http://localhost:9200/server-metrics/_bulk --data-binary "@server-metrics_3.json"
|
||||
|
||||
curl -u elastic:x-pack-test-password -X POST -H "Content-Type: application/json"
|
||||
http://localhost:9200/server-metrics/_bulk --data-binary "@server-metrics_4.json"
|
||||
----------------------------------
|
||||
// NOTCONSOLE
|
||||
|
||||
TIP: This will upload 200MB of data. This is split into 4 files as there is a
|
||||
maximum 100MB limit when using the `_bulk` API.
|
||||
|
||||
These commands might take some time to run, depending on the computing resources
|
||||
available.
|
||||
|
||||
You can verify that the data was loaded successfully with the following command:
|
||||
|
||||
[source,sh]
|
||||
----------------------------------
|
||||
curl 'http://localhost:9200/_cat/indices?v' -u elastic:x-pack-test-password
|
||||
----------------------------------
|
||||
// NOTCONSOLE
|
||||
|
||||
You should see output similar to the following:
|
||||
|
||||
[source,txt]
|
||||
----------------------------------
|
||||
health status index ... pri rep docs.count ...
|
||||
green open server-metrics ... 1 0 905940 ...
|
||||
----------------------------------
|
||||
// NOTCONSOLE
|
||||
|
||||
Next, you must define an index pattern for this data set:
|
||||
|
||||
. Open {kib} in your web browser and log in. If you are running {kib}
|
||||
locally, go to `http://localhost:5601/`.
|
||||
|
||||
. Click the **Management** tab, then **{kib}** > **Index Patterns**.
|
||||
|
||||
. If you already have index patterns, click **Create Index** to define a new
|
||||
one. Otherwise, the **Create index pattern** wizard is already open.
|
||||
|
||||
. For this tutorial, any pattern that matches the name of the index you've
|
||||
loaded will work. For example, enter `server-metrics*` as the index pattern.
|
||||
|
||||
. In the **Configure settings** step, select the `@timestamp` field in the
|
||||
**Time Filter field name** list.
|
||||
|
||||
. Click **Create index pattern**.
|
||||
|
||||
This data set can now be analyzed in {ml} jobs in {kib}.
|
|
@ -0,0 +1,76 @@
|
|||
[[ml-gs-forecast]]
|
||||
=== Creating Forecasts
|
||||
|
||||
In addition to detecting anomalous behavior in your data, you can use
|
||||
{ml} to predict future behavior. For more information, see <<ml-forecasting>>.
|
||||
|
||||
To create a forecast in {kib}:
|
||||
|
||||
. Go to the **Single Metric Viewer** and select one of the jobs that you created
|
||||
in this tutorial. For example, select the `total-requests` job.
|
||||
|
||||
. Click **Forecast**. +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-forecast.jpg["Create a forecast from the Single Metric Viewer"]
|
||||
--
|
||||
|
||||
. Specify a duration for your forecast. This value indicates how far to
|
||||
extrapolate beyond the last record that was processed. You must use time units,
|
||||
such as `30d` for 30 days. For more information, see
|
||||
{ref}/common-options.html#time-units[Time Units]. In this example, we use a
|
||||
duration of 1 week: +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-duration.jpg["Specify a duration of 1w"]
|
||||
--
|
||||
|
||||
. View the forecast in the **Single Metric Viewer**: +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-forecast-results.jpg["View a forecast from the Single Metric Viewer"]
|
||||
|
||||
The yellow line in the chart represents the predicted data values. The shaded
|
||||
yellow area represents the bounds for the predicted values, which also gives an
|
||||
indication of the confidence of the predictions. Note that the bounds generally
|
||||
increase with time (that is to say, the confidence levels decrease), since you
|
||||
are forecasting further into the future. Eventually if the confidence levels are
|
||||
too low, the forecast stops.
|
||||
--
|
||||
|
||||
. Optional: Compare the forecast to actual data. +
|
||||
+
|
||||
--
|
||||
You can try this with the sample data by choosing a subset of the data when you
|
||||
create the job, as described in <<ml-gs-jobs>>. Create the forecast then process
|
||||
the remaining data, as described in <<ml-gs-job1-datafeed>>.
|
||||
--
|
||||
|
||||
.. After you restart the {dfeed}, re-open the forecast by selecting the job in
|
||||
the **Single Metric Viewer**, clicking **Forecast**, and selecting your forecast
|
||||
from the list. For example: +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-forecast-open.jpg["Open a forecast in the Single Metric Viewer"]
|
||||
--
|
||||
|
||||
.. View the forecast and actual data in the **Single Metric Viewer**: +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-forecast-actual.jpg["View a forecast over actual data in the Single Metric Viewer"]
|
||||
|
||||
The chart contains the actual data values, the bounds for the expected values,
|
||||
the anomalies, the forecast data values, and the bounds for the forecast. This
|
||||
combination of actual and forecast data gives you an indication of how well the
|
||||
{xpack} {ml} features can extrapolate the future behavior of the data.
|
||||
--
|
||||
|
||||
Now that you have seen how easy it is to create forecasts with the sample data,
|
||||
consider what type of events you might want to predict in your own data. For
|
||||
more information and ideas, as well as a list of limitations related to
|
||||
forecasts, see <<ml-forecasting>>.
|
|
@ -0,0 +1,211 @@
|
|||
[[ml-gs-multi-jobs]]
|
||||
=== Creating Multi-metric Jobs
|
||||
|
||||
The multi-metric job wizard in {kib} provides a simple way to create more
|
||||
complex jobs with multiple detectors. For example, in the single metric job, you
|
||||
were tracking total requests versus time. You might also want to track other
|
||||
metrics like average response time or the maximum number of denied requests.
|
||||
Instead of creating jobs for each of those metrics, you can combine them in a
|
||||
multi-metric job.
|
||||
|
||||
You can also use multi-metric jobs to split a single time series into multiple
|
||||
time series based on a categorical field. For example, you can split the data
|
||||
based on its hostnames, locations, or users. Each time series is modeled
|
||||
independently. By looking at temporal patterns on a per entity basis, you might
|
||||
spot things that might have otherwise been hidden in the lumped view.
|
||||
|
||||
Conceptually, you can think of this as running many independent single metric
|
||||
jobs. By bundling them together in a multi-metric job, however, you can see an
|
||||
overall score and shared influencers for all the metrics and all the entities in
|
||||
the job. Multi-metric jobs therefore scale better than having many independent
|
||||
single metric jobs and provide better results when you have influencers that are
|
||||
shared across the detectors.
|
||||
|
||||
The sample data for this tutorial contains information about the requests that
|
||||
are received by various applications and services in a system. Let's assume that
|
||||
you want to monitor the requests received and the response time. In particular,
|
||||
you might want to track those metrics on a per service basis to see if any
|
||||
services have unusual patterns.
|
||||
|
||||
To create a multi-metric job in {kib}:
|
||||
|
||||
. Open {kib} in your web browser and log in. If you are running {kib} locally,
|
||||
go to `http://localhost:5601/`.
|
||||
|
||||
. Click **Machine Learning** in the side navigation, then click **Create new job**.
|
||||
|
||||
. Select the index pattern that you created for the sample data. For example,
|
||||
`server-metrics*`.
|
||||
|
||||
. In the **Use a wizard** section, click **Multi metric**.
|
||||
|
||||
. Configure the job by providing the following job settings: +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-multi-job.jpg["Create a new job from the server-metrics index"]
|
||||
--
|
||||
|
||||
.. For the **Fields**, select `high mean(response)` and `sum(total)`. This
|
||||
creates two detectors and specifies the analysis function and field that each
|
||||
detector uses. The first detector uses the high mean function to detect
|
||||
unusually high average values for the `response` field in each bucket. The
|
||||
second detector uses the sum function to detect when the sum of the `total`
|
||||
field is anomalous in each bucket. For more information about any of the
|
||||
analytical functions, see <<ml-functions>>.
|
||||
|
||||
.. For the **Bucket span**, enter `10m`. This value specifies the size of the
|
||||
interval that the analysis is aggregated into. As was the case in the single
|
||||
metric example, this value has a significant impact on the analysis. When you're
|
||||
creating jobs for your own data, you might need to experiment with different
|
||||
bucket spans depending on the frequency of the input data, the duration of
|
||||
typical anomalies, and the frequency at which alerting is required.
|
||||
|
||||
.. For the **Split Data**, select `service`. When you specify this
|
||||
option, the analysis is segmented such that you have completely independent
|
||||
baselines for each distinct value of this field.
|
||||
//TBD: What is the importance of having separate baselines?
|
||||
There are seven unique service keyword values in the sample data. Thus for each
|
||||
of the seven services, you will see the high mean response metrics and sum
|
||||
total metrics. +
|
||||
+
|
||||
--
|
||||
NOTE: If you are creating a job by using the {ml} APIs or the advanced job
|
||||
wizard in {kib}, you can accomplish this split by using the
|
||||
`partition_field_name` property.
|
||||
|
||||
--
|
||||
|
||||
.. For the **Key Fields (Influencers)**, select `host`. Note that the `service` field
|
||||
is also automatically selected because you used it to split the data. These key
|
||||
fields are also known as _influencers_.
|
||||
When you identify a field as an influencer, you are indicating that you think
|
||||
it contains information about someone or something that influences or
|
||||
contributes to anomalies.
|
||||
+
|
||||
--
|
||||
[TIP]
|
||||
========================
|
||||
Picking an influencer is strongly recommended for the following reasons:
|
||||
|
||||
* It allows you to more easily assign blame for the anomaly
|
||||
* It simplifies and aggregates the results
|
||||
|
||||
The best influencer is the person or thing that you want to blame for the
|
||||
anomaly. In many cases, users or client IP addresses make excellent influencers.
|
||||
Influencers can be any field in your data; they do not need to be fields that
|
||||
are specified in your detectors, though they often are.
|
||||
|
||||
As a best practice, do not pick too many influencers. For example, you generally
|
||||
do not need more than three. If you pick many influencers, the results can be
|
||||
overwhelming and there is a small overhead to the analysis.
|
||||
|
||||
========================
|
||||
//TBD: Is this something you can determine later from looking at results and
|
||||
//update your job with if necessary? Is it all post-processing or does it affect
|
||||
//the ongoing modeling?
|
||||
--
|
||||
|
||||
. Click **Use full server-metrics* data**. Two graphs are generated for each
|
||||
`service` value, which represent the high mean `response` values and
|
||||
sum `total` values over time. For example:
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job2-split.jpg["Kibana charts for data split by service"]
|
||||
--
|
||||
|
||||
. Provide a name for the job, for example `response_requests_by_app`. The job
|
||||
name must be unique in your cluster. You can also optionally provide a
|
||||
description of the job.
|
||||
|
||||
. Click **Create Job**.
|
||||
|
||||
When the job is created, you can choose to view the results, continue the job in
|
||||
real-time, and create a watch. In this tutorial, we will proceed to view the
|
||||
results.
|
||||
|
||||
TIP: The `create_multi_metic.sh` script creates a similar job and {dfeed} by
|
||||
using the {ml} APIs. You can download that script by clicking
|
||||
here: https://download.elastic.co/demos/machine_learning/gettingstarted/create_multi_metric.sh[create_multi_metric.sh]
|
||||
For API reference information, see {ref}/ml-apis.html[Machine Learning APIs].
|
||||
|
||||
[[ml-gs-job2-analyze]]
|
||||
=== Exploring Multi-metric Job Results
|
||||
|
||||
The {xpackml} features analyze the input stream of data, model its behavior, and
|
||||
perform analysis based on the two detectors you defined in your job. When an
|
||||
event occurs outside of the model, that event is identified as an anomaly.
|
||||
|
||||
You can use the **Anomaly Explorer** in {kib} to view the analysis results:
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job2-explorer.jpg["Job results in the Anomaly Explorer"]
|
||||
|
||||
You can explore the overall anomaly time line, which shows the maximum anomaly
|
||||
score for each section in the specified time period. You can change the time
|
||||
period by using the time picker in the {kib} toolbar. Note that the sections in
|
||||
this time line do not necessarily correspond to the bucket span. If you change
|
||||
the time period, the sections change size too. The smallest possible size for
|
||||
these sections is a bucket. If you specify a large time period, the sections can
|
||||
span many buckets.
|
||||
|
||||
On the left is a list of the top influencers for all of the detected anomalies
|
||||
in that same time period. The list includes maximum anomaly scores, which in
|
||||
this case are aggregated for each influencer, for each bucket, across all
|
||||
detectors. There is also a total sum of the anomaly scores for each influencer.
|
||||
You can use this list to help you narrow down the contributing factors and focus
|
||||
on the most anomalous entities.
|
||||
|
||||
If your job contains influencers, you can also explore swim lanes that
|
||||
correspond to the values of an influencer. In this example, the swim lanes
|
||||
correspond to the values for the `service` field that you used to split the data.
|
||||
Each lane represents a unique application or service name. Since you specified
|
||||
the `host` field as an influencer, you can also optionally view the results in
|
||||
swim lanes for each host name:
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job2-explorer-host.jpg["Job results sorted by host"]
|
||||
|
||||
By default, the swim lanes are ordered by their maximum anomaly score values.
|
||||
You can click on the sections in the swim lane to see details about the
|
||||
anomalies that occurred in that time interval.
|
||||
|
||||
NOTE: The anomaly scores that you see in each section of the **Anomaly Explorer**
|
||||
might differ slightly. This disparity occurs because for each job we generate
|
||||
bucket results, influencer results, and record results. Anomaly scores are
|
||||
generated for each type of result. The anomaly timeline uses the bucket-level
|
||||
anomaly scores. The list of top influencers uses the influencer-level anomaly
|
||||
scores. The list of anomalies uses the record-level anomaly scores. For more
|
||||
information about these different result types, see
|
||||
{ref}/ml-results-resource.html[Results Resources].
|
||||
|
||||
Click on a section in the swim lanes to obtain more information about the
|
||||
anomalies in that time period. For example, click on the red section in the swim
|
||||
lane for `server_2`:
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job2-explorer-anomaly.jpg["Job results for an anomaly"]
|
||||
|
||||
You can see exact times when anomalies occurred and which detectors or metrics
|
||||
caught the anomaly. Also note that because you split the data by the `service`
|
||||
field, you see separate charts for each applicable service. In particular, you
|
||||
see charts for each service for which there is data on the specified host in the
|
||||
specified time interval.
|
||||
|
||||
Below the charts, there is a table that provides more information, such as the
|
||||
typical and actual values and the influencers that contributed to the anomaly.
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job2-explorer-table.jpg["Job results table"]
|
||||
|
||||
Notice that there are anomalies for both detectors, that is to say for both the
|
||||
`high_mean(response)` and the `sum(total)` metrics in this time interval. The
|
||||
table aggregates the anomalies to show the highest severity anomaly per detector
|
||||
and entity, which is the by, over, or partition field value that is displayed
|
||||
in the **found for** column. To view all the anomalies without any aggregation,
|
||||
set the **Interval** to `Show all`.
|
||||
|
||||
By
|
||||
investigating multiple metrics in a single job, you might see relationships
|
||||
between events in your data that would otherwise be overlooked.
|
|
@ -0,0 +1,55 @@
|
|||
[[ml-gs-next]]
|
||||
=== Next Steps
|
||||
|
||||
By completing this tutorial, you've learned how you can detect anomalous
|
||||
behavior in a simple set of sample data. You created single and multi-metric
|
||||
jobs in {kib}, which creates and opens jobs and creates and starts {dfeeds} for
|
||||
you under the covers. You examined the results of the {ml} analysis in the
|
||||
**Single Metric Viewer** and **Anomaly Explorer** in {kib}. You also
|
||||
extrapolated the future behavior of a job by creating a forecast.
|
||||
|
||||
If you want to learn about advanced job options, you might be interested in
|
||||
the following video tutorial:
|
||||
https://www.elastic.co/videos/machine-learning-lab-3-detect-outliers-in-a-population[Machine Learning Lab 3 - Detect Outliers in a Population].
|
||||
|
||||
If you intend to use {ml} APIs in your applications, a good next step might be
|
||||
to learn about the APIs by retrieving information about these sample jobs.
|
||||
For example, the following APIs retrieve information about the jobs and {dfeeds}.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET _xpack/ml/anomaly_detectors
|
||||
|
||||
GET _xpack/ml/datafeeds
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
For more information about the {ml} APIs, see <<ml-api-quickref>>.
|
||||
|
||||
Ultimately, the next step is to start applying {ml} to your own data.
|
||||
As mentioned in <<ml-gs-data>>, there are three things to consider when you're
|
||||
thinking about where {ml} will be most impactful:
|
||||
|
||||
. It must be time series data.
|
||||
. It should be information that contains key performance indicators for the
|
||||
health, security, or success of your business or system. The better you know the
|
||||
data, the quicker you will be able to create jobs that generate useful
|
||||
insights.
|
||||
. Ideally, the data is located in {es} and you can therefore create a {dfeed}
|
||||
that retrieves data in real time. If your data is outside of {es}, you
|
||||
cannot use {kib} to create your jobs and you cannot use {dfeeds}. Machine
|
||||
learning analysis is still possible, however, by using APIs to create and manage
|
||||
jobs and to post data to them.
|
||||
|
||||
Once you have decided which data to analyze, you can start considering which
|
||||
analysis functions you want to use. For more information, see <<ml-functions>>.
|
||||
|
||||
In general, it is a good idea to start with single metric jobs for your
|
||||
key performance indicators. After you examine these simple analysis results,
|
||||
you will have a better idea of what the influencers might be. You can create
|
||||
multi-metric jobs and split the data or create more complex analysis functions
|
||||
as necessary. For examples of more complicated configuration options, see
|
||||
<<ml-configuring>>.
|
||||
|
||||
If you encounter problems, we're here to help. See <<xpack-help>> and
|
||||
<<ml-troubleshooting>>.
|
|
@ -0,0 +1,331 @@
|
|||
[[ml-gs-jobs]]
|
||||
=== Creating Single Metric Jobs
|
||||
|
||||
At this point in the tutorial, the goal is to detect anomalies in the
|
||||
total requests received by your applications and services. The sample data
|
||||
contains a single key performance indicator(KPI) to track this, which is the total
|
||||
requests over time. It is therefore logical to start by creating a single metric
|
||||
job for this KPI.
|
||||
|
||||
TIP: If you are using aggregated data, you can create an advanced job
|
||||
and configure it to use a `summary_count_field_name`. The {ml} algorithms will
|
||||
make the best possible use of summarized data in this case. For simplicity, in
|
||||
this tutorial we will not make use of that advanced functionality. For more
|
||||
information, see <<ml-configuring-aggregation>>.
|
||||
|
||||
A single metric job contains a single _detector_. A detector defines the type of
|
||||
analysis that will occur (for example, `max`, `average`, or `rare` analytical
|
||||
functions) and the fields that will be analyzed.
|
||||
|
||||
To create a single metric job in {kib}:
|
||||
|
||||
. Open {kib} in your web browser and log in. If you are running {kib} locally,
|
||||
go to `http://localhost:5601/`.
|
||||
|
||||
. Click **Machine Learning** in the side navigation.
|
||||
|
||||
. Click **Create new job**.
|
||||
|
||||
. Select the index pattern that you created for the sample data. For example,
|
||||
`server-metrics*`.
|
||||
|
||||
. In the **Use a wizard** section, click **Single metric**.
|
||||
|
||||
. Configure the job by providing the following information: +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-single-job.jpg["Create a new job from the server-metrics index"]
|
||||
--
|
||||
|
||||
.. For the **Aggregation**, select `Sum`. This value specifies the analysis
|
||||
function that is used.
|
||||
+
|
||||
--
|
||||
Some of the analytical functions look for single anomalous data points. For
|
||||
example, `max` identifies the maximum value that is seen within a bucket.
|
||||
Others perform some aggregation over the length of the bucket. For example,
|
||||
`mean` calculates the mean of all the data points seen within the bucket.
|
||||
Similarly, `count` calculates the total number of data points within the bucket.
|
||||
In this tutorial, you are using the `sum` function, which calculates the sum of
|
||||
the specified field's values within the bucket. For descriptions of all the
|
||||
functions, see <<ml-functions>>.
|
||||
--
|
||||
|
||||
.. For the **Field**, select `total`. This value specifies the field that
|
||||
the detector uses in the function.
|
||||
+
|
||||
--
|
||||
NOTE: Some functions such as `count` and `rare` do not require fields.
|
||||
--
|
||||
|
||||
.. For the **Bucket span**, enter `10m`. This value specifies the size of the
|
||||
interval that the analysis is aggregated into.
|
||||
+
|
||||
--
|
||||
The {xpackml} features use the concept of a bucket to divide up the time series
|
||||
into batches for processing. For example, if you are monitoring
|
||||
the total number of requests in the system,
|
||||
using a bucket span of 1 hour would mean that at the end of each hour, it
|
||||
calculates the sum of the requests for the last hour and computes the
|
||||
anomalousness of that value compared to previous hours.
|
||||
|
||||
The bucket span has two purposes: it dictates over what time span to look for
|
||||
anomalous features in data, and also determines how quickly anomalies can be
|
||||
detected. Choosing a shorter bucket span enables anomalies to be detected more
|
||||
quickly. However, there is a risk of being too sensitive to natural variations
|
||||
or noise in the input data. Choosing too long a bucket span can mean that
|
||||
interesting anomalies are averaged away. There is also the possibility that the
|
||||
aggregation might smooth out some anomalies based on when the bucket starts
|
||||
in time.
|
||||
|
||||
The bucket span has a significant impact on the analysis. When you're trying to
|
||||
determine what value to use, take into account the granularity at which you
|
||||
want to perform the analysis, the frequency of the input data, the duration of
|
||||
typical anomalies, and the frequency at which alerting is required.
|
||||
--
|
||||
|
||||
. Determine whether you want to process all of the data or only part of it. If
|
||||
you want to analyze all of the existing data, click
|
||||
**Use full server-metrics* data**. If you want to see what happens when you
|
||||
stop and start {dfeeds} and process additional data over time, click the time
|
||||
picker in the {kib} toolbar. Since the sample data spans a period of time
|
||||
between March 23, 2017 and April 22, 2017, click **Absolute**. Set the start
|
||||
time to March 23, 2017 and the end time to April 1, 2017, for example. Once
|
||||
you've got the time range set up, click the **Go** button. +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job1-time.jpg["Setting the time range for the {dfeed}"]
|
||||
--
|
||||
+
|
||||
--
|
||||
A graph is generated, which represents the total number of requests over time.
|
||||
|
||||
Note that the **Estimate bucket span** option is no longer greyed out in the
|
||||
**Buck span** field. This is an experimental feature that you can use to help
|
||||
determine an appropriate bucket span for your data. For the purposes of this
|
||||
tutorial, we will leave the bucket span at 10 minutes.
|
||||
--
|
||||
|
||||
. Provide a name for the job, for example `total-requests`. The job name must
|
||||
be unique in your cluster. You can also optionally provide a description of the
|
||||
job and create a job group.
|
||||
|
||||
. Click **Create Job**. +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job1.jpg["A graph of the total number of requests over time"]
|
||||
--
|
||||
|
||||
As the job is created, the graph is updated to give a visual representation of
|
||||
the progress of {ml} as the data is processed. This view is only available whilst the
|
||||
job is running.
|
||||
|
||||
When the job is created, you can choose to view the results, continue the job
|
||||
in real-time, and create a watch. In this tutorial, we will look at how to
|
||||
manage jobs and {dfeeds} before we view the results.
|
||||
|
||||
TIP: The `create_single_metic.sh` script creates a similar job and {dfeed} by
|
||||
using the {ml} APIs. You can download that script by clicking
|
||||
here: https://download.elastic.co/demos/machine_learning/gettingstarted/create_single_metric.sh[create_single_metric.sh]
|
||||
For API reference information, see {ref}/ml-apis.html[Machine Learning APIs].
|
||||
|
||||
[[ml-gs-job1-manage]]
|
||||
=== Managing Jobs
|
||||
|
||||
After you create a job, you can see its status in the **Job Management** tab: +
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job1-manage1.jpg["Status information for the total-requests job"]
|
||||
|
||||
The following information is provided for each job:
|
||||
|
||||
Job ID::
|
||||
The unique identifier for the job.
|
||||
|
||||
Description::
|
||||
The optional description of the job.
|
||||
|
||||
Processed records::
|
||||
The number of records that have been processed by the job.
|
||||
|
||||
Memory status::
|
||||
The status of the mathematical models. When you create jobs by using the APIs or
|
||||
by using the advanced options in {kib}, you can specify a `model_memory_limit`.
|
||||
That value is the maximum amount of memory resources that the mathematical
|
||||
models can use. Once that limit is approached, data pruning becomes more
|
||||
aggressive. Upon exceeding that limit, new entities are not modeled. For more
|
||||
information about this setting, see
|
||||
{ref}/ml-job-resource.html#ml-apilimits[Analysis Limits]. The memory status
|
||||
field reflects whether you have reached or exceeded the model memory limit. It
|
||||
can have one of the following values: +
|
||||
`ok`::: The models stayed below the configured value.
|
||||
`soft_limit`::: The models used more than 60% of the configured memory limit
|
||||
and older unused models will be pruned to free up space.
|
||||
`hard_limit`::: The models used more space than the configured memory limit.
|
||||
As a result, not all incoming data was processed.
|
||||
|
||||
Job state::
|
||||
The status of the job, which can be one of the following values: +
|
||||
`opened`::: The job is available to receive and process data.
|
||||
`closed`::: The job finished successfully with its model state persisted.
|
||||
The job must be opened before it can accept further data.
|
||||
`closing`::: The job close action is in progress and has not yet completed.
|
||||
A closing job cannot accept further data.
|
||||
`failed`::: The job did not finish successfully due to an error.
|
||||
This situation can occur due to invalid input data.
|
||||
If the job had irrevocably failed, it must be force closed and then deleted.
|
||||
If the {dfeed} can be corrected, the job can be closed and then re-opened.
|
||||
|
||||
{dfeed-cap} state::
|
||||
The status of the {dfeed}, which can be one of the following values: +
|
||||
started::: The {dfeed} is actively receiving data.
|
||||
stopped::: The {dfeed} is stopped and will not receive data until it is
|
||||
re-started.
|
||||
|
||||
Latest timestamp::
|
||||
The timestamp of the last processed record.
|
||||
|
||||
|
||||
If you click the arrow beside the name of job, you can show or hide additional
|
||||
information, such as the settings, configuration information, or messages for
|
||||
the job.
|
||||
|
||||
You can also click one of the **Actions** buttons to start the {dfeed}, edit
|
||||
the job or {dfeed}, and clone or delete the job, for example.
|
||||
|
||||
[float]
|
||||
[[ml-gs-job1-datafeed]]
|
||||
==== Managing {dfeeds-cap}
|
||||
|
||||
A {dfeed} can be started and stopped multiple times throughout its lifecycle.
|
||||
If you want to retrieve more data from {es} and the {dfeed} is stopped, you must
|
||||
restart it.
|
||||
|
||||
For example, if you did not use the full data when you created the job, you can
|
||||
now process the remaining data by restarting the {dfeed}:
|
||||
|
||||
. In the **Machine Learning** / **Job Management** tab, click the following
|
||||
button to start the {dfeed}: image:images/ml-start-feed.jpg["Start {dfeed}"]
|
||||
|
||||
|
||||
. Choose a start time and end time. For example,
|
||||
click **Continue from 2017-04-01 23:59:00** and select **2017-04-30** as the
|
||||
search end time. Then click **Start**. The date picker defaults to the latest
|
||||
timestamp of processed data. Be careful not to leave any gaps in the analysis,
|
||||
otherwise you might miss anomalies. +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job1-datafeed.jpg["Restarting a {dfeed}"]
|
||||
--
|
||||
|
||||
The {dfeed} state changes to `started`, the job state changes to `opened`,
|
||||
and the number of processed records increases as the new data is analyzed. The
|
||||
latest timestamp information also increases.
|
||||
|
||||
TIP: If your data is being loaded continuously, you can continue running the job
|
||||
in real time. For this, start your {dfeed} and select **No end time**.
|
||||
|
||||
If you want to stop the {dfeed} at this point, you can click the following
|
||||
button: image:images/ml-stop-feed.jpg["Stop {dfeed}"]
|
||||
|
||||
Now that you have processed all the data, let's start exploring the job results.
|
||||
|
||||
[[ml-gs-job1-analyze]]
|
||||
=== Exploring Single Metric Job Results
|
||||
|
||||
The {xpackml} features analyze the input stream of data, model its behavior,
|
||||
and perform analysis based on the detectors you defined in your job. When an
|
||||
event occurs outside of the model, that event is identified as an anomaly.
|
||||
|
||||
Result records for each anomaly are stored in `.ml-anomalies-*` indices in {es}.
|
||||
By default, the name of the index where {ml} results are stored is labelled
|
||||
`shared`, which corresponds to the `.ml-anomalies-shared` index.
|
||||
|
||||
You can use the **Anomaly Explorer** or the **Single Metric Viewer** in {kib} to
|
||||
view the analysis results.
|
||||
|
||||
Anomaly Explorer::
|
||||
This view contains swim lanes showing the maximum anomaly score over time.
|
||||
There is an overall swim lane that shows the overall score for the job, and
|
||||
also swim lanes for each influencer. By selecting a block in a swim lane, the
|
||||
anomaly details are displayed alongside the original source data (where
|
||||
applicable).
|
||||
|
||||
Single Metric Viewer::
|
||||
This view contains a chart that represents the actual and expected values over
|
||||
time. This is only available for jobs that analyze a single time series and
|
||||
where `model_plot_config` is enabled. As in the **Anomaly Explorer**, anomalous
|
||||
data points are shown in different colors depending on their score.
|
||||
|
||||
By default when you view the results for a single metric job, the
|
||||
**Single Metric Viewer** opens:
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job1-analysis.jpg["Single Metric Viewer for total-requests job"]
|
||||
|
||||
|
||||
The blue line in the chart represents the actual data values. The shaded blue
|
||||
area represents the bounds for the expected values. The area between the upper
|
||||
and lower bounds are the most likely values for the model. If a value is outside
|
||||
of this area then it can be said to be anomalous.
|
||||
|
||||
If you slide the time selector from the beginning of the data to the end of the
|
||||
data, you can see how the model improves as it processes more data. At the
|
||||
beginning, the expected range of values is pretty broad and the model is not
|
||||
capturing the periodicity in the data. But it quickly learns and begins to
|
||||
reflect the daily variation.
|
||||
|
||||
Any data points outside the range that was predicted by the model are marked
|
||||
as anomalies. When you have high volumes of real-life data, many anomalies
|
||||
might be found. These vary in probability from very likely to highly unlikely,
|
||||
that is to say, from not particularly anomalous to highly anomalous. There
|
||||
can be none, one or two or tens, sometimes hundreds of anomalies found within
|
||||
each bucket. There can be many thousands found per job. In order to provide
|
||||
a sensible view of the results, an _anomaly score_ is calculated for each bucket
|
||||
time interval. The anomaly score is a value from 0 to 100, which indicates
|
||||
the significance of the observed anomaly compared to previously seen anomalies.
|
||||
The highly anomalous values are shown in red and the low scored values are
|
||||
indicated in blue. An interval with a high anomaly score is significant and
|
||||
requires investigation.
|
||||
|
||||
Slide the time selector to a section of the time series that contains a red
|
||||
anomaly data point. If you hover over the point, you can see more information
|
||||
about that data point. You can also see details in the **Anomalies** section
|
||||
of the viewer. For example:
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job1-anomalies.jpg["Single Metric Viewer Anomalies for total-requests job"]
|
||||
|
||||
For each anomaly you can see key details such as the time, the actual and
|
||||
expected ("typical") values, and their probability.
|
||||
|
||||
By default, the table contains all anomalies that have a severity of "warning"
|
||||
or higher in the selected section of the timeline. If you are only interested in
|
||||
critical anomalies, for example, you can change the severity threshold for this
|
||||
table.
|
||||
|
||||
The anomalies table also automatically calculates an interval for the data in
|
||||
the table. If the time difference between the earliest and latest records in the
|
||||
table is less than two days, the data is aggregated by hour to show the details
|
||||
of the highest severity anomaly for each detector. Otherwise, it is
|
||||
aggregated by day. You can change the interval for the table, for example, to
|
||||
show all anomalies.
|
||||
|
||||
You can see the same information in a different format by using the
|
||||
**Anomaly Explorer**:
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job1-explorer.jpg["Anomaly Explorer for total-requests job"]
|
||||
|
||||
|
||||
Click one of the red sections in the swim lane to see details about the anomalies
|
||||
that occurred in that time interval. For example:
|
||||
[role="screenshot"]
|
||||
image::images/ml-gs-job1-explorer-anomaly.jpg["Anomaly Explorer details for total-requests job"]
|
||||
|
||||
After you have identified anomalies, often the next step is to try to determine
|
||||
the context of those situations. For example, are there other factors that are
|
||||
contributing to the problem? Are the anomalies confined to particular
|
||||
applications or servers? You can begin to troubleshoot these situations by
|
||||
layering additional jobs or creating multi-metric jobs.
|
|
@ -0,0 +1,99 @@
|
|||
[[ml-gs-wizards]]
|
||||
=== Creating Jobs in {kib}
|
||||
++++
|
||||
<titleabbrev>Creating Jobs</titleabbrev>
|
||||
++++
|
||||
|
||||
Machine learning jobs contain the configuration information and metadata
|
||||
necessary to perform an analytical task. They also contain the results of the
|
||||
analytical task.
|
||||
|
||||
[NOTE]
|
||||
--
|
||||
This tutorial uses {kib} to create jobs and view results, but you can
|
||||
alternatively use APIs to accomplish most tasks.
|
||||
For API reference information, see {ref}/ml-apis.html[Machine Learning APIs].
|
||||
|
||||
The {xpackml} features in {kib} use pop-ups. You must configure your
|
||||
web browser so that it does not block pop-up windows or create an
|
||||
exception for your {kib} URL.
|
||||
--
|
||||
|
||||
{kib} provides wizards that help you create typical {ml} jobs. For example, you
|
||||
can use wizards to create single metric, multi-metric, population, and advanced
|
||||
jobs.
|
||||
|
||||
To see the job creation wizards:
|
||||
|
||||
. Open {kib} in your web browser and log in. If you are running {kib} locally,
|
||||
go to `http://localhost:5601/`.
|
||||
|
||||
. Click **Machine Learning** in the side navigation.
|
||||
|
||||
. Click **Create new job**.
|
||||
|
||||
. Click the `server-metrics*` index pattern.
|
||||
|
||||
You can then choose from a list of job wizards. For example:
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-create-job.jpg["Job creation wizards in {kib}"]
|
||||
|
||||
If you are not certain which wizard to use, there is also a **Data Visualizer**
|
||||
that can help you explore the fields in your data.
|
||||
|
||||
To learn more about the sample data:
|
||||
|
||||
. Click **Data Visualizer**. +
|
||||
+
|
||||
--
|
||||
[role="screenshot"]
|
||||
image::images/ml-data-visualizer.jpg["Data Visualizer in {kib}"]
|
||||
--
|
||||
|
||||
. Select a time period that you're interested in exploring by using the time
|
||||
picker in the {kib} toolbar. Alternatively, click
|
||||
**Use full server-metrics* data** to view data over the full time range. In this
|
||||
sample data, the documents relate to March and April 2017.
|
||||
|
||||
. Optional: Change the number of documents per shard that are used in the
|
||||
visualizations. There is a relatively small number of documents in the sample
|
||||
data, so you can choose a value of `all`. For larger data sets, keep in mind
|
||||
that using a large sample size increases query run times and increases the load
|
||||
on the cluster.
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-data-metrics.jpg["Data Visualizer output for metrics in {kib}"]
|
||||
|
||||
The fields in the indices are listed in two sections. The first section contains
|
||||
the numeric ("metric") fields. The second section contains non-metric fields
|
||||
(such as `keyword`, `text`, `date`, `boolean`, `ip`, and `geo_point` data types).
|
||||
|
||||
For metric fields, the **Data Visualizer** indicates how many documents contain
|
||||
the field in the selected time period. It also provides information about the
|
||||
minimum, median, and maximum values, the number of distinct values, and their
|
||||
distribution. You can use the distribution chart to get a better idea of how
|
||||
the values in the data are clustered. Alternatively, you can view the top values
|
||||
for metric fields. For example:
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-data-topmetrics.jpg["Data Visualizer output for top values in {kib}"]
|
||||
|
||||
For date fields, the **Data Visualizer** provides the earliest and latest field
|
||||
values and the number and percentage of documents that contain the field
|
||||
during the selected time period. For example:
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-data-dates.jpg["Data Visualizer output for date fields in {kib}"]
|
||||
|
||||
For keyword fields, the **Data Visualizer** provides the number of distinct
|
||||
values, a list of the top values, and the number and percentage of documents
|
||||
that contain the field during the selected time period. For example:
|
||||
|
||||
[role="screenshot"]
|
||||
image::images/ml-data-keywords.jpg["Data Visualizer output for date fields in {kib}"]
|
||||
|
||||
In this tutorial, you will create single and multi-metric jobs that use the
|
||||
`total`, `response`, `service`, and `host` fields. Though there is an option to
|
||||
create an advanced job directly from the **Data Visualizer**, we will use the
|
||||
single and multi-metric job creation wizards instead.
|
|
@ -0,0 +1,80 @@
|
|||
[[ml-getting-started]]
|
||||
== Getting Started with Machine Learning
|
||||
++++
|
||||
<titleabbrev>Getting Started</titleabbrev>
|
||||
++++
|
||||
|
||||
Ready to get some hands-on experience with the {xpackml} features? This
|
||||
tutorial shows you how to:
|
||||
|
||||
* Load a sample data set into {es}
|
||||
* Create single and multi-metric {ml} jobs in {kib}
|
||||
* Use the results to identify possible anomalies in the data
|
||||
|
||||
At the end of this tutorial, you should have a good idea of what {ml} is and
|
||||
will hopefully be inspired to use it to detect anomalies in your own data.
|
||||
|
||||
You might also be interested in these video tutorials, which use the same sample
|
||||
data:
|
||||
|
||||
* https://www.elastic.co/videos/machine-learning-tutorial-creating-a-single-metric-job[Machine Learning for the Elastic Stack: Creating a single metric job]
|
||||
* https://www.elastic.co/videos/machine-learning-tutorial-creating-a-multi-metric-job[Machine Learning for the Elastic Stack: Creating a multi-metric job]
|
||||
|
||||
|
||||
[float]
|
||||
[[ml-gs-sysoverview]]
|
||||
=== System Overview
|
||||
|
||||
To follow the steps in this tutorial, you will need the following
|
||||
components of the Elastic Stack:
|
||||
|
||||
* {es} {version}, which stores the data and the analysis results
|
||||
* {kib} {version}, which provides a helpful user interface for creating and
|
||||
viewing jobs
|
||||
|
||||
See the https://www.elastic.co/support/matrix[Elastic Support Matrix] for
|
||||
information about supported operating systems.
|
||||
|
||||
See {stack-ref}/installing-elastic-stack.html[Installing the Elastic Stack] for
|
||||
information about installing each of the components.
|
||||
|
||||
NOTE: To get started, you can install {es} and {kib} on a
|
||||
single VM or even on your laptop (requires 64-bit OS).
|
||||
As you add more data and your traffic grows,
|
||||
you'll want to replace the single {es} instance with a cluster.
|
||||
|
||||
By default, when you install {es} and {kib}, {xpack} is installed and the
|
||||
{ml} features are enabled. You cannot use {ml} with the free basic license, but
|
||||
you can try all of the {xpack} features with a <<license-management,trial license>>.
|
||||
|
||||
If you have multiple nodes in your cluster, you can optionally dedicate nodes to
|
||||
specific purposes. If you want to control which nodes are
|
||||
_machine learning nodes_ or limit which nodes run resource-intensive
|
||||
activity related to jobs, see <<xpack-settings>>.
|
||||
|
||||
[float]
|
||||
[[ml-gs-users]]
|
||||
==== Users, Roles, and Privileges
|
||||
|
||||
The {xpackml} features implement cluster privileges and built-in roles to
|
||||
make it easier to control which users have authority to view and manage the jobs,
|
||||
{dfeeds}, and results.
|
||||
|
||||
By default, you can perform all of the steps in this tutorial by using the
|
||||
built-in `elastic` super user. However, the password must be set before the user
|
||||
can do anything. For information about how to set that password, see
|
||||
<<security-getting-started>>.
|
||||
|
||||
If you are performing these steps in a production environment, take extra care
|
||||
because `elastic` has the `superuser` role and you could inadvertently make
|
||||
significant changes to the system. You can alternatively assign the
|
||||
`machine_learning_admin` and `kibana_user` roles to a user ID of your choice.
|
||||
|
||||
For more information, see <<built-in-roles>> and <<privileges-list-cluster>>.
|
||||
|
||||
include::getting-started-data.asciidoc[]
|
||||
include::getting-started-wizards.asciidoc[]
|
||||
include::getting-started-single.asciidoc[]
|
||||
include::getting-started-multi.asciidoc[]
|
||||
include::getting-started-forecast.asciidoc[]
|
||||
include::getting-started-next.asciidoc[]
|
After Width: | Height: | Size: 118 KiB |
After Width: | Height: | Size: 347 KiB |
After Width: | Height: | Size: 70 KiB |
After Width: | Height: | Size: 187 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 97 KiB |
After Width: | Height: | Size: 17 KiB |
After Width: | Height: | Size: 17 KiB |
After Width: | Height: | Size: 350 KiB |
After Width: | Height: | Size: 99 KiB |
After Width: | Height: | Size: 75 KiB |
After Width: | Height: | Size: 1.9 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 304 KiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 293 KiB |
After Width: | Height: | Size: 286 KiB |
After Width: | Height: | Size: 92 KiB |
After Width: | Height: | Size: 262 KiB |
After Width: | Height: | Size: 398 KiB |
After Width: | Height: | Size: 133 KiB |
After Width: | Height: | Size: 157 KiB |
After Width: | Height: | Size: 236 KiB |
After Width: | Height: | Size: 134 KiB |
After Width: | Height: | Size: 154 KiB |
After Width: | Height: | Size: 218 KiB |
After Width: | Height: | Size: 175 KiB |
After Width: | Height: | Size: 245 KiB |
After Width: | Height: | Size: 249 KiB |
After Width: | Height: | Size: 64 KiB |
After Width: | Height: | Size: 122 KiB |
After Width: | Height: | Size: 230 KiB |
After Width: | Height: | Size: 141 KiB |
After Width: | Height: | Size: 84 KiB |
After Width: | Height: | Size: 87 KiB |
After Width: | Height: | Size: 176 KiB |
After Width: | Height: | Size: 96 KiB |
After Width: | Height: | Size: 205 KiB |
After Width: | Height: | Size: 100 KiB |
After Width: | Height: | Size: 1.3 KiB |
After Width: | Height: | Size: 4.5 KiB |
After Width: | Height: | Size: 90 KiB |
|
@ -0,0 +1,27 @@
|
|||
[[xpack-ml]]
|
||||
= Machine Learning in the Elastic Stack
|
||||
|
||||
[partintro]
|
||||
--
|
||||
Machine learning is tightly integrated with the Elastic Stack. Data is pulled
|
||||
from {es} for analysis and anomaly results are displayed in {kib} dashboards.
|
||||
|
||||
* <<ml-overview>>
|
||||
* <<ml-getting-started>>
|
||||
* <<ml-configuring>>
|
||||
* <<stopping-ml>>
|
||||
* <<ml-troubleshooting, Troubleshooting Machine Learning>>
|
||||
* <<ml-api-quickref>>
|
||||
* <<ml-functions>>
|
||||
|
||||
|
||||
--
|
||||
|
||||
include::overview.asciidoc[]
|
||||
include::getting-started.asciidoc[]
|
||||
include::configuring.asciidoc[]
|
||||
include::stopping-ml.asciidoc[]
|
||||
// include::ml-scenarios.asciidoc[]
|
||||
include::api-quickref.asciidoc[]
|
||||
//include::troubleshooting.asciidoc[] Referenced from x-pack/docs/public/xpack-troubleshooting.asciidoc
|
||||
include::functions.asciidoc[]
|
|
@ -0,0 +1,33 @@
|
|||
[[ml-jobs]]
|
||||
=== Machine Learning Jobs
|
||||
++++
|
||||
<titleabbrev>Jobs</titleabbrev>
|
||||
++++
|
||||
|
||||
Machine learning jobs contain the configuration information and metadata
|
||||
necessary to perform an analytics task.
|
||||
|
||||
Each job has one or more _detectors_. A detector applies an analytical function
|
||||
to specific fields in your data. For more information about the types of
|
||||
analysis you can perform, see <<ml-functions>>.
|
||||
|
||||
A job can also contain properties that affect which types of entities or events
|
||||
are considered anomalous. For example, you can specify whether entities are
|
||||
analyzed relative to their own previous behavior or relative to other entities
|
||||
in a population. There are also multiple options for splitting the data into
|
||||
categories and partitions. Some of these more advanced job configurations
|
||||
are described in the following section: <<ml-configuring>>.
|
||||
|
||||
For a description of all the job properties, see
|
||||
{ref}/ml-job-resource.html[Job Resources].
|
||||
|
||||
In {kib}, there are wizards that help you create specific types of jobs, such
|
||||
as _single metric_, _multi-metric_, and _population_ jobs. A single metric job
|
||||
is just a job with a single detector and limited job properties. To have access
|
||||
to all of the job properties in {kib}, you must choose the _advanced_ job wizard.
|
||||
If you want to try creating single and multi-metrics jobs in {kib} with sample
|
||||
data, see <<ml-getting-started>>.
|
||||
|
||||
You can also optionally assign jobs to one or more _job groups_. You can use
|
||||
job groups to view the results from multiple jobs more easily and to expedite
|
||||
administrative tasks by opening or closing multiple jobs at once.
|