Breaks site
This commit is contained in:
parent
6150e42eec
commit
2729ce3ccb
52
_config.yml
52
_config.yml
|
@ -1,18 +1,3 @@
|
|||
# Welcome to Jekyll!
|
||||
#
|
||||
# This config file is meant for settings that affect your whole blog, values
|
||||
# which you are expected to set up once and rarely edit after that. If you find
|
||||
# yourself editing this file very often, consider using Jekyll's data files
|
||||
# feature for the data you need to update frequently.
|
||||
#
|
||||
# For technical reasons, this file is *NOT* reloaded automatically when you use
|
||||
# 'bundle exec jekyll serve'. If you change this file, please restart the server process.
|
||||
|
||||
# Site settings
|
||||
# These are used to personalize your new site. If you look in the HTML files,
|
||||
# you will see them accessed via {{ site.title }}, {{ site.email }}, and so on.
|
||||
# You can create any custom variable you would like, and they will be accessible
|
||||
# in the templates via {{ site.myvariable }}.
|
||||
title: OpenSearch documentation
|
||||
description: >- # this means to ignore newlines until "baseurl:"
|
||||
Documentation for OpenSearch, the Apache 2.0 search, analytics, and visualization suite with advanced security, alerting, SQL support, automated index management, deep performance analysis, and more.
|
||||
|
@ -39,6 +24,43 @@ aux_links:
|
|||
- "https://opensearch.org"
|
||||
color_scheme: opensearch
|
||||
|
||||
# Define Jekyll collections
|
||||
collections:
|
||||
# Define a collection named "tests", its documents reside in the "_tests" directory
|
||||
opensearch_docs:
|
||||
permalink: "/:collection/:path/"
|
||||
output: true
|
||||
opensearch_dashboards_docs:
|
||||
permalink: "/:collection/:path/"
|
||||
output: true
|
||||
opensearch_plugins_docs:
|
||||
permalink: "/:collection/:path/"
|
||||
output: true
|
||||
external_links:
|
||||
permalink: "/:collection/:path/"
|
||||
output: true
|
||||
|
||||
just_the_docs:
|
||||
# Define the collections used in the theme
|
||||
collections:
|
||||
opensearch_docs:
|
||||
name: OpenSearch
|
||||
# nav_exclude: true
|
||||
# nav_fold: true
|
||||
# search_exclude: true
|
||||
opensearch_dashboards_docs:
|
||||
name: OpenSearch Dashboards
|
||||
#nav_fold: true
|
||||
opensearch_plugins_docs:
|
||||
name: Plugins
|
||||
#nav_fold: true
|
||||
opensearch_troubleshooting_docs:
|
||||
name: Troubleshooting
|
||||
#nav_fold: true
|
||||
external_links:
|
||||
name: External links
|
||||
|
||||
|
||||
# Enable or disable the site search
|
||||
# Supports true (default) or false
|
||||
search_enabled: true
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
layout: default
|
||||
title: Javadoc
|
||||
nav_order: 1
|
||||
permalink: /javadoc/
|
||||
redirect_to: https://opensearch.org/docs/javadocs/
|
||||
---
|
|
@ -60,7 +60,7 @@
|
|||
{%- for node in pages_list -%}
|
||||
{%- if node.parent == nil -%}
|
||||
{%- unless node.nav_exclude -%}
|
||||
<li class="nav-list-item{% if page.url == node.url or page.parent == node.title or page.grand_parent == node.title %} active{% endif %}">
|
||||
<li class="nav-list-item{% if page.collection == include.key and page.url == node.url or page.parent == node.title or page.grand_parent == node.title %} active{% endif %}">
|
||||
{%- if node.has_children -%}
|
||||
<a href="#" class="nav-list-expander"><svg viewBox="0 0 24 24"><use xlink:href="#svg-arrow-right"></use></svg></a>
|
||||
{%- endif -%}
|
||||
|
@ -90,14 +90,36 @@
|
|||
</li>
|
||||
{%- endunless -%}
|
||||
{%- endfor -%}
|
||||
|
||||
</ul>
|
||||
{%- endif -%}
|
||||
</li>
|
||||
{%- endunless -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
<li class="nav-list-item">
|
||||
<a href="https://opensearch.org/docs/javadocs/" target="_blank" class="nav-list-link">Javadoc <svg class="external-arrow" width="16" height="16" fill="#002A3A"><use xlink:href="#external-arrow"></use></svg></a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
{%- if page.collection == include.key -%}
|
||||
|
||||
{%- for node in pages_list -%}
|
||||
{%- if node.parent == nil -%}
|
||||
{%- if page.parent == node.title or page.grand_parent == node.title -%}
|
||||
{%- assign first_level_url = node.url | absolute_url -%}
|
||||
{%- endif -%}
|
||||
{%- if node.has_children -%}
|
||||
{%- assign children_list = pages_list | where: "parent", node.title -%}
|
||||
{%- for child in children_list -%}
|
||||
{%- if child.has_children -%}
|
||||
{%- if page.url == child.url or page.parent == child.title and page.grand_parent == child.parent -%}
|
||||
{%- assign second_level_url = child.url | absolute_url -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
|
||||
{% if page.has_children == true and page.has_toc != false %}
|
||||
{%- assign toc_list = pages_list | where: "parent", page.title | where: "grand_parent", page.parent -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- endif -%}
|
||||
|
|
|
@ -38,13 +38,6 @@ layout: table_wrappers
|
|||
<path d="M13 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9z"></path><polyline points="13 2 13 9 20 9"></polyline>
|
||||
</svg>
|
||||
</symbol>
|
||||
<symbol id="external-arrow" viewBox="0 0 16 16">
|
||||
<title>External</title>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">
|
||||
<path fill-rule="evenodd" d="M8.636 3.5a.5.5 0 0 0-.5-.5H1.5A1.5 1.5 0 0 0 0 4.5v10A1.5 1.5 0 0 0 1.5 16h10a1.5 1.5 0 0 0 1.5-1.5V7.864a.5.5 0 0 0-1 0V14.5a.5.5 0 0 1-.5.5h-10a.5.5 0 0 1-.5-.5v-10a.5.5 0 0 1 .5-.5h6.636a.5.5 0 0 0 .5-.5z"/>
|
||||
<path fill-rule="evenodd" d="M16 .5a.5.5 0 0 0-.5-.5h-5a.5.5 0 0 0 0 1h3.793L6.146 9.146a.5.5 0 1 0 .708.708L15 1.707V5.5a.5.5 0 0 0 1 0v-5z"/>
|
||||
</svg>
|
||||
</symbol>
|
||||
</svg>
|
||||
|
||||
<div class="side-bar">
|
||||
|
@ -55,6 +48,14 @@ layout: table_wrappers
|
|||
</a>
|
||||
</div>
|
||||
<nav role="navigation" aria-label="Main" id="site-nav" class="site-nav">
|
||||
{% assign pages_top_size = site.html_pages
|
||||
| where_exp:"item", "item.title != nil"
|
||||
| where_exp:"item", "item.parent == nil"
|
||||
| where_exp:"item", "item.nav_exclude != true"
|
||||
| size %}
|
||||
{% if pages_top_size > 0 %}
|
||||
{% include nav.html pages=site.html_pages key=nil %}
|
||||
{% endif %}
|
||||
{% if site.just_the_docs.collections %}
|
||||
{% assign collections_size = site.just_the_docs.collections | size %}
|
||||
{% for collection_entry in site.just_the_docs.collections %}
|
||||
|
@ -62,14 +63,26 @@ layout: table_wrappers
|
|||
{% assign collection_value = collection_entry[1] %}
|
||||
{% assign collection = site[collection_key] %}
|
||||
{% if collection_value.nav_exclude != true %}
|
||||
{% if collections_size > 1 %}
|
||||
{% if collections_size > 1 or pages_top_size > 0 %}
|
||||
{% if collection_value.nav_fold == true %}
|
||||
<ul class="nav-list nav-category-list">
|
||||
<li class="nav-list-item{% if page.collection == collection_key %} active{% endif %}">
|
||||
{%- if collection.size > 0 -%}
|
||||
<a href="#" class="nav-list-expander"><svg viewBox="0 0 24 24"><use xlink:href="#svg-arrow-right"></use></svg></a>
|
||||
{%- endif -%}
|
||||
<div class="nav-category">{{ collection_value.name }}</div>
|
||||
{% include nav.html pages=collection key=collection_key %}
|
||||
</li>
|
||||
</ul>
|
||||
{% else %}
|
||||
<div class="nav-category">{{ collection_value.name }}</div>
|
||||
{% include nav.html pages=collection key=collection_key %}
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{% include nav.html pages=collection key=collection_key %}
|
||||
{% endif %}
|
||||
{% include nav.html pages=collection %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
{% include nav.html pages=site.html_pages %}
|
||||
{% endif %}
|
||||
</nav>
|
||||
<footer class="site-footer">
|
||||
|
@ -110,21 +123,6 @@ layout: table_wrappers
|
|||
<div id="main-content-wrap" class="main-content-wrap">
|
||||
{% unless page.url == "/" %}
|
||||
{% if page.parent %}
|
||||
{%- for node in pages_list -%}
|
||||
{%- if node.parent == nil -%}
|
||||
{%- if page.parent == node.title or page.grand_parent == node.title -%}
|
||||
{%- assign first_level_url = node.url | absolute_url -%}
|
||||
{%- endif -%}
|
||||
{%- if node.has_children -%}
|
||||
{%- assign children_list = pages_list | where: "parent", node.title -%}
|
||||
{%- for child in children_list -%}
|
||||
{%- if page.url == child.url or page.parent == child.title -%}
|
||||
{%- assign second_level_url = child.url | absolute_url -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
<nav aria-label="Breadcrumb" class="breadcrumb-nav">
|
||||
<ol class="breadcrumb-nav-list">
|
||||
{% if page.grand_parent %}
|
||||
|
@ -142,7 +140,6 @@ layout: table_wrappers
|
|||
{% if site.heading_anchors != false %}
|
||||
{% include vendor/anchor_headings.html html=content beforeHeading="true" anchorBody="<svg viewBox=\"0 0 16 16\" aria-hidden=\"true\"><use xlink:href=\"#svg-link\"></use></svg>" anchorClass="anchor-heading" anchorAttrs="aria-labelledby=\"%html_id%\"" %}
|
||||
{% else %}
|
||||
<p class="warning" style="margin-top: 0">Like OpenSearch itself, this documentation is a beta. It has content gaps and might contain bugs.</p>
|
||||
{{ content }}
|
||||
{% endif %}
|
||||
|
||||
|
@ -150,8 +147,7 @@ layout: table_wrappers
|
|||
<hr>
|
||||
<h2 class="text-delta">Table of contents</h2>
|
||||
<ul>
|
||||
{%- assign children_list = pages_list | where: "parent", page.title | where: "grand_parent", page.parent -%}
|
||||
{% for child in children_list %}
|
||||
{% for child in toc_list %}
|
||||
<li>
|
||||
<a href="{{ child.url | absolute_url }}">{{ child.title }}</a>{% if child.summary %} - {{ child.summary }}{% endif %}
|
||||
</li>
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
---
|
||||
layout: default
|
||||
title: Gantt charts
|
||||
nav_order: 10
|
||||
has_toc: false
|
||||
---
|
||||
|
||||
# Gantt charts
|
||||
|
||||
OpenSearch Dashboards includes a Gantt chart visualization. Gantt charts show the start, end, and duration of unique events in a sequence. Gantt charts are useful in trace analytics, telemetry, and anomaly detection use cases, where you want to understand interactions and dependencies between various events in a schedule.
|
||||
|
||||
For example, consider an index of log data. The fields in a typical set of log data, especially audit logs, contain a specific operation or event with a start time and duration.
|
||||
|
||||
To create a Gantt chart, perform the following steps:
|
||||
|
||||
1. In the visualizations menu, choose **Create visualization** and **Gantt Chart**.
|
||||
1. Choose a source for the chart (e.g. some log data).
|
||||
1. Under **Metrics**, choose **Event**. For log data, each log is an event.
|
||||
1. Select the **Start Time** and **Duration** fields from your data set. The start time is the timestamp for the begining of an event. The duration is the amount of time to add to the start time.
|
||||
1. Under **Results**, choose the number of events to display on the chart. Gantt charts sequence events from earliest to latest based on start time.
|
||||
1. Choose **Panel settings** to adjust axis labels, time format, and colors.
|
||||
1. Choose **Update**.
|
||||
|
||||
![Gantt Chart](../../images/gantt-chart.png)
|
||||
|
||||
This Gantt chart displays the ID of each log on the y-axis. Each bar is a unique event that spans some amount of time. Hover over a bar to see the duration of that event.
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
layout: default
|
||||
title: Introduction to Dashboards
|
||||
nav_order: 1
|
||||
---
|
||||
|
||||
# About OpenSearch Dashboards
|
||||
|
||||
OpenSearch Dashboards is the default visualization tool for data in OpenSearch. It also serves as a user interface for many of the OpenSearch plugins, including security, alerting, Index State Management, SQL, and more.
|
||||
|
||||
|
||||
## Get started with OpenSearch Dashboards
|
||||
|
||||
1. After starting OpenSearch Dashboards, you can access it at port 5601. For example, http://localhost:5601.
|
||||
1. Log in with the default username `admin` and password `admin`.
|
||||
1. Choose **Try our sample data** and add the sample flight data.
|
||||
1. Choose **Discover** and search for a few flights.
|
||||
1. Choose **Dashboard**, **[Flights] Global Flight Dashboard**, and wait for the dashboard to load.
|
|
@ -0,0 +1,23 @@
|
|||
---
|
||||
layout: default
|
||||
title: Docker
|
||||
parent: Install OpenSearch Dashboards
|
||||
nav_order: 1
|
||||
---
|
||||
|
||||
# Run OpenSearch Dashboards using Docker
|
||||
|
||||
You *can* start OpenSearch Dashboards using `docker run` after [creating a Docker network](https://docs.docker.com/engine/reference/commandline/network_create/) and starting OpenSearch, but the process of connecting OpenSearch Dashboards to OpenSearch is significantly easier with a Docker Compose file.
|
||||
|
||||
1. Run `docker pull opensearchproject/opensearch-dashboards:{{site.opensearch_version}}`.
|
||||
|
||||
1. Create a [`docker-compose.yml`](https://docs.docker.com/compose/compose-file/) file appropriate for your environment. A sample file that includes OpenSearch Dashboards is available on the OpenSearch [Docker installation page](../opensearch/docker/#sample-docker-compose-file).
|
||||
|
||||
Just like `opensearch.yml`, you can pass a custom `opensearch_dashboards.yml` to the container in the Docker Compose file.
|
||||
{: .tip }
|
||||
|
||||
1. Run `docker-compose up`.
|
||||
|
||||
Wait for the containers to start. Then see the [OpenSearch Dashboards documentation](../../../opensearch-dashboards/).
|
||||
|
||||
1. When finished, run `docker-compose down`.
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
layout: default
|
||||
title: Install OpenSearch Dashboards
|
||||
nav_order: 2
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Install and configure OpenSearch Dashboards
|
||||
|
||||
OpenSearch Dashboards has two installation options at this time: Docker images and tarballs.
|
|
@ -0,0 +1,205 @@
|
|||
---
|
||||
layout: default
|
||||
title: OpenSearch Dashboards plugins
|
||||
parent: Install OpenSearch Dashboards
|
||||
nav_order: 50
|
||||
---
|
||||
|
||||
# Standalone plugin install
|
||||
|
||||
If you don't want to use the all-in-one installation options, you can install the various plugins for OpenSearch Dashboards individually.
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Plugin compatibility
|
||||
|
||||
<table>
|
||||
<thead style="text-align: left">
|
||||
<tr>
|
||||
<th>OpenSearch Dashboards version</th>
|
||||
<th>Plugin versions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>1.0.0-beta1</td>
|
||||
<td>
|
||||
<pre>
|
||||
alertingDashboards 1.0.0.0-beta1
|
||||
anomalyDetectionDashboards 1.0.0.0-beta1
|
||||
ganttChartDashboards 1.0.0.0-beta1
|
||||
indexManagementDashboards 1.0.0.0-beta1
|
||||
notebooksDashboards 1.0.0.0-beta1
|
||||
queryWorkbenchDashboards 1.0.0.0-beta1
|
||||
reportsDashboards 1.0.0.0-beta1
|
||||
securityDashboards 1.0.0.0-beta1
|
||||
traceAnalyticsDashboards 1.0.0.0-beta1
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- A compatible OpenSearch cluster
|
||||
- The corresponding OpenSearch plugins [installed on that cluster](../../install/plugins)
|
||||
- The corresponding version of [OpenSearch Dashboards](../) (e.g. OpenSearch Dashboards 1.0.0 works with OpenSearch 1.0.0)
|
||||
|
||||
|
||||
## Install
|
||||
|
||||
Navigate to the OpenSearch Dashboards home directory (likely `/usr/share/opensearch-dashboards`) and run the install command for each plugin.
|
||||
|
||||
|
||||
#### Security OpenSearch Dashboards
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-security/opensearchSecurityOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.0.1.zip
|
||||
```
|
||||
|
||||
This plugin provides a user interface for managing users, roles, mappings, action groups, and tenants.
|
||||
|
||||
|
||||
#### Alerting OpenSearch Dashboards
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-alerting/opensearchAlertingOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.0.0.zip
|
||||
```
|
||||
|
||||
This plugin provides a user interface for creating monitors and managing alerts.
|
||||
|
||||
|
||||
#### Index State Management OpenSearch Dashboards
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-index-management/opensearchIndexManagementOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.0.1.zip
|
||||
```
|
||||
|
||||
This plugin provides a user interface for managing policies.
|
||||
|
||||
|
||||
#### Anomaly Detection OpenSearch Dashboards
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-anomaly-detection/opensearchAnomalyDetectionOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.0.0.zip
|
||||
```
|
||||
|
||||
This plugin provides a user interface for adding detectors.
|
||||
|
||||
|
||||
#### Query Workbench OpenSearch Dashboards
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-query-workbench/opensearchQueryWorkbenchOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.0.0.zip
|
||||
```
|
||||
|
||||
This plugin provides a user interface for using SQL queries to explore your data.
|
||||
|
||||
|
||||
#### Trace Analytics
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-trace-analytics/opensearchTraceAnalyticsOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.2.0.zip
|
||||
```
|
||||
|
||||
This plugin uses distributed trace data (indexed in OpenSearch using Data Prepper) to display latency trends, error rates, and more.
|
||||
|
||||
|
||||
#### Notebooks OpenSearch Dashboards
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-notebooks/opensearchNotebooksOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.2.0.zip
|
||||
```
|
||||
|
||||
This plugin lets you combine OpenSearch Dashboards visualizations and narrative text in a single interface.
|
||||
|
||||
|
||||
#### Reports OpenSearch Dashboards
|
||||
|
||||
```bash
|
||||
# x86 Linux
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-reports/linux/x64/opensearchReportsOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.2.0-linux-x64.zip
|
||||
# ARM64 Linux
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-reports/linux/arm64/opensearchReportsOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.2.0-linux-arm64.zip
|
||||
# x86 Windows
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-reports/windows/x64/opensearchReportsOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.2.0-windows-x64.zip
|
||||
```
|
||||
|
||||
This plugin lets you export and share reports from OpenSearch Dashboards dashboards, visualizations, and saved searches.
|
||||
|
||||
|
||||
#### Gantt Chart OpenSearch Dashboards
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-dashboards-plugins/opensearch-gantt-chart/opensearchGanttChartOpenSearch Dashboards-{{site.opensearch_major_minor_version}}.0.0.zip
|
||||
```
|
||||
|
||||
This plugin adds a new Gantt chart visualization.
|
||||
|
||||
|
||||
## List installed plugins
|
||||
|
||||
To check your installed plugins:
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin list
|
||||
```
|
||||
|
||||
|
||||
## Remove plugins
|
||||
|
||||
To remove a plugin:
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin remove <plugin-name>
|
||||
```
|
||||
|
||||
For certain plugins, you must also remove the "optimze" bundle. This is a sample command for the Anomaly Detection plugin:
|
||||
|
||||
```bash
|
||||
sudo rm /usr/share/opensearch-dashboards/optimize/bundles/opensearch-anomaly-detection-opensearch-dashboards.*
|
||||
```
|
||||
|
||||
Then restart OpenSearch Dashboards. After you remove any plugin, OpenSearch Dashboards performs an optimize operation the next time you start it. This operation takes several minutes even on fast machines, so be patient.
|
||||
|
||||
|
||||
## Update plugins
|
||||
|
||||
OpenSearch Dashboards doesn’t update plugins. Instead, you have to remove the old version and its optimized bundle, reinstall them, and restart OpenSearch Dashboards:
|
||||
|
||||
1. Remove the old version:
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin remove <plugin-name>
|
||||
```
|
||||
|
||||
1. Remove the optimized bundle:
|
||||
|
||||
```bash
|
||||
sudo rm /usr/share/opensearch-dashboards/optimize/bundles/<bundle-name>
|
||||
```
|
||||
|
||||
1. Reinstall the new version:
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-dashboards-plugin install <plugin-name>
|
||||
```
|
||||
|
||||
1. Restart OpenSearch Dashboards.
|
||||
|
||||
For example, to remove and reinstall the anomaly detection plugin:
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin remove opensearch-anomaly-detection
|
||||
sudo rm /usr/share/opensearch-dashboards/optimize/bundles/opensearch-anomaly-detection-opensearch-dashboards.*
|
||||
sudo bin/opensearch-dashboards-plugin install <AD OpenSearch Dashboards plugin artifact URL>
|
||||
```
|
|
@ -0,0 +1,30 @@
|
|||
---
|
||||
layout: default
|
||||
title: Tarball
|
||||
parent: Install OpenSearch Dashboards
|
||||
nav_order: 30
|
||||
---
|
||||
|
||||
# Run OpenSearch Dashboards using the tarball
|
||||
|
||||
1. Download the tarball from the [OpenSearch downloads page](https://opensearch.org/downloads.html){:target='\_blank'}.
|
||||
|
||||
1. Extract the TAR file to a directory and change to that directory:
|
||||
|
||||
```bash
|
||||
# x64
|
||||
tar -zxf opensearch-dashboards-{{site.opensearch_version}}-linux-x64.tar.gz
|
||||
cd opensearch-dashboards{% comment %}# ARM64
|
||||
tar -zxf opensearch-dashboards-{{site.opensearch_version}}-linux-arm64.tar.gz
|
||||
cd opensearch-dashboards{% endcomment %}
|
||||
```
|
||||
|
||||
1. If desired, modify `config/opensearch_dashboards.yml`.
|
||||
|
||||
1. Run OpenSearch Dashboards:
|
||||
|
||||
```bash
|
||||
./bin/opensearch-dashboards
|
||||
```
|
||||
|
||||
1. See the [OpenSearch Dashboards documentation](../../opensearch-dashboards/).
|
|
@ -0,0 +1,29 @@
|
|||
---
|
||||
layout: default
|
||||
title: WMS map server
|
||||
nav_order: 5
|
||||
---
|
||||
|
||||
# Configure WMS map server
|
||||
|
||||
OpenSearch Dashboards includes default map tiles, but if you need more specialized maps, you can configure OpenSearch Dashboards to use a WMS map server:
|
||||
|
||||
1. Open OpenSearch Dashboards at `https://<host>:<port>`. For example, [https://localhost:5601](https://localhost:5601).
|
||||
1. If necessary, log in.
|
||||
1. Choose **Management** and **Advanced Settings**.
|
||||
1. Locate `visualization:tileMap:WMSdefaults`.
|
||||
1. Change `enabled` to true and add the URL of a valid WMS map server:
|
||||
|
||||
```json
|
||||
{
|
||||
"enabled": true,
|
||||
"url": "<wms-map-server-url>",
|
||||
"options": {
|
||||
"format": "image/png",
|
||||
"transparent": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Map services often have licensing fees or restrictions. You're responsible for all such considerations on any map server that you specify.
|
||||
{: .note }
|
|
@ -0,0 +1,63 @@
|
|||
---
|
||||
layout: default
|
||||
title: Notebooks (experimental)
|
||||
nav_order: 50
|
||||
redirect_from: /docs/notebooks/
|
||||
---
|
||||
|
||||
# OpenSearch Dashboards notebooks (experimental)
|
||||
|
||||
Notebooks have a known issue with [tenants](../../security/access-control/multi-tenancy/). If you open a notebook and can't see its visualizations, you might be under the wrong tenant, or you might not have access to the tenant at all.
|
||||
{: .warning }
|
||||
|
||||
An OpenSearch Dashboards notebook is an interface that lets you easily combine live visualizations and narrative text in a single notebook interface.
|
||||
|
||||
Notebooks let you interactively explore data by running different visualizations that you can share with team members to collaborate on a project.
|
||||
|
||||
A notebook is a document composed of two elements: OpenSearch Dashboards visualizations and paragraphs (Markdown). Choose multiple timelines to compare and contrast visualizations.
|
||||
|
||||
Common use cases include creating postmortem reports, designing runbooks, building live infrastructure reports, and writing documentation.
|
||||
|
||||
|
||||
## Get Started with notebooks
|
||||
|
||||
To get started, choose **OpenSearch Dashboards Notebooks** within OpenSearch Dashboards.
|
||||
|
||||
|
||||
### Step 1: Create a notebook
|
||||
|
||||
A notebook is an interface for creating reports.
|
||||
|
||||
1. Choose **Create notebook** and enter a descriptive name.
|
||||
1. Choose **Create**.
|
||||
|
||||
Choose **Notebook actions** to rename, duplicate, or delete a notebook.
|
||||
|
||||
|
||||
### Step 2: Add a paragraph
|
||||
|
||||
Paragraphs combine text and visualizations for describing data.
|
||||
|
||||
|
||||
#### Add a markdown paragraph
|
||||
|
||||
1. To add text, choose **Add markdown paragraph**.
|
||||
1. Add rich text with markdown syntax.
|
||||
|
||||
![Markdown paragraph](../../images/markdown-notebook.png)
|
||||
|
||||
|
||||
#### Add a visualization paragraph
|
||||
|
||||
1. To add a visualization, choose **Add OpenSearch Dashboards visualization paragraph**.
|
||||
1. In **Title**, select your visualization and choose a date range. You can choose multiple timelines to compare and contrast visualizations.
|
||||
1. To run and save a paragraph, choose **Run**.
|
||||
|
||||
You can perform the following actions on paragraphs:
|
||||
|
||||
- Add a new paragraph to the top of a report.
|
||||
- Add a new paragraph to the bottom of a report.
|
||||
- Run all the paragraphs at the same time.
|
||||
- Clear the outputs of all paragraphs.
|
||||
- Delete all the paragraphs.
|
||||
- Move paragraphs up and down.
|
|
@ -0,0 +1,54 @@
|
|||
---
|
||||
layout: default
|
||||
title: Reporting
|
||||
nav_order: 20
|
||||
---
|
||||
|
||||
|
||||
# Reporting
|
||||
|
||||
You can use OpenSearch Dashboards to create PNG, PDF, and CSV reports. To create reports, you must have the correct permissions. For a summary of the predefined roles and the permissions they grant, see the [security plugin](../../security/access-control/users-roles/#predefined-roles).
|
||||
|
||||
|
||||
## Create reports from Discovery, Visualize, or Dashboard
|
||||
|
||||
Quickly generate an on-demand report from the current view.
|
||||
|
||||
1. From the top menu bar, choose **Reporting**.
|
||||
1. For dashboards or visualizations, choose **Download PDF** or **Download PNG**. From the Discover page, choose **Download CSV**.
|
||||
|
||||
Reports generate asynchronously in the background and might take a few minutes, depending on the size of the report. A notification appears when your report is ready to download.
|
||||
|
||||
1. To create a schedule-based report, choose **Create report definition**. Then proceed to [Create reports using a definition](#create-reports-using-a-definition). This option pre-fills many of the fields for you based on the visualization, dashboard, or data you were viewing.
|
||||
|
||||
|
||||
## Create reports using a definition
|
||||
|
||||
Definitions let you generate reports on a periodic schedule.
|
||||
|
||||
1. From the navigation panel, choose **Reporting**.
|
||||
1. Choose **Create**.
|
||||
1. Under **Report settings**, enter a name and optional description for your report.
|
||||
1. Choose the **Report Source** (i.e. the page from which the report is generated). You can generate reports from the **Dashboard**, **Visualize**, or **Discover** pages.
|
||||
1. Select your dashboard, visualization, or saved search. Then choose a time range for the report.
|
||||
1. Choose an appropriate file format for the report.
|
||||
1. (Optional) Add a header or footer to the report. Headers and footers are only available for dashboard or visualization reports.
|
||||
1. Under **Report trigger**, choose either **On-demand** or **Schedule**.
|
||||
|
||||
For scheduled reports, select either **Recurring** or **Cron based**. You can receive reports daily or at some other time interval. Cron expressions give you even more flexiblity. See [Cron expression reference](../../alerting/cron/) for more information.
|
||||
|
||||
1. Choose **Create**.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Chromium fails to launch with OpenSearch Dashboards
|
||||
|
||||
While creating a report for dashboards or visualizations, you might see a the following error:
|
||||
|
||||
![OpenSearch Dashboards reporting pop-up error message](../../images/reporting-error.png)
|
||||
|
||||
This problem can occur for two reasons:
|
||||
|
||||
- You don't have the correct version of `headless-chrome` to match the operating system on which OpenSearch Dashboards is running. Download the correct version [here](https://github.com/opensearch-project/dashboards-reports/releases/tag/chromium-1.12.0.0).
|
||||
|
||||
- You're missing additional dependencies. Install the required dependencies for your operating system from the [additional libraries](https://github.com/opensearch-project/dashboards-reports/blob/main/dashboards-reports/rendering-engine/headless-chrome/README.md#additional-libaries) section.
|
|
@ -0,0 +1,160 @@
|
|||
---
|
||||
layout: default
|
||||
title: Aggregations
|
||||
nav_order: 13
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Aggregations
|
||||
|
||||
OpenSearch isn’t just for search. Aggregations let you tap into OpenSearch's powerful analytics engine to analyze your data and extract statistics from it.
|
||||
|
||||
The use cases of aggregations vary from analyzing data in real time to take some action to using OpenSearch Dashboards to create a visualization dashboard.
|
||||
|
||||
OpenSearch can perform aggregations on massive datasets in milliseconds. Compared to queries, aggregations consume more CPU cycles and memory.
|
||||
|
||||
## Aggregations on text fields
|
||||
|
||||
By default, OpenSearch doesn't support aggregations on a text field.
|
||||
Because text fields are tokenized, an aggregation on a text field has to reverse the tokenization process back to its original string and then formulate an aggregation based on that. Such an operation consumes significant memory and degrades cluster performance.
|
||||
|
||||
While you can enable aggregations on text fields by setting the `fielddata` parameter to `true` in the mapping, the aggregations are still based on the tokenized words and not on the raw text.
|
||||
|
||||
We recommend keeping a raw version of the text field as a `keyword` field that you can aggregate on.
|
||||
In this case, you can perform aggregations on the `title.raw` field, instead of the `title` field:
|
||||
|
||||
```json
|
||||
PUT movies
|
||||
{
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "text",
|
||||
"fielddata": true,
|
||||
"fields": {
|
||||
"raw": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## General aggregation structure
|
||||
|
||||
The structure of an aggregation query is as follows:
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"NAME": {
|
||||
"AGG_TYPE": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If you’re only interested in the aggregation result and not in the results of the query, set `size` to 0.
|
||||
|
||||
In the `aggs` property (you can use `aggregations` if you want), you can define any number of aggregations.
|
||||
Each aggregation is defined by its name and one of the types of aggregations that OpenSearch supports.
|
||||
|
||||
The name of the aggregation helps you to distinguish between different aggregations in the response.
|
||||
The `AGG_TYPE` property is where you specify the type of aggregation.
|
||||
|
||||
## Sample aggregation
|
||||
|
||||
This section uses the OpenSearch Dashboards sample e-commerce data and web log data. To add the sample data, log in to OpenSearch Dashboards, choose **Home** and **Try our sample data**. For **Sample eCommerce orders** and **Sample web logs**, choose **Add data**.
|
||||
|
||||
### avg
|
||||
|
||||
To find the average value of the `taxful_total_price` field:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"avg_taxful_total_price": {
|
||||
"avg": {
|
||||
"field": "taxful_total_price"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"took" : 1,
|
||||
"timed_out" : false,
|
||||
"_shards" : {
|
||||
"total" : 1,
|
||||
"successful" : 1,
|
||||
"skipped" : 0,
|
||||
"failed" : 0
|
||||
},
|
||||
"hits" : {
|
||||
"total" : {
|
||||
"value" : 4675,
|
||||
"relation" : "eq"
|
||||
},
|
||||
"max_score" : null,
|
||||
"hits" : [ ]
|
||||
},
|
||||
"aggregations" : {
|
||||
"avg_taxful_total_price" : {
|
||||
"value" : 75.05542864304813
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The aggregation block in the response shows the average value for the `taxful_total_price` field.
|
||||
|
||||
## Types of aggregations
|
||||
|
||||
There are three main types of aggregations:
|
||||
|
||||
- Metric aggregations - Calculate metrics such as `sum`, `min`, `max`, and `avg` on numeric fields.
|
||||
- Bucket aggregations - Sort query results into groups based on some criteria.
|
||||
- Pipeline aggregations - Pipe the output of one aggregation as an input to another.
|
||||
|
||||
## Nested aggregations
|
||||
|
||||
Aggregations within aggregations are called nested or sub aggregations.
|
||||
|
||||
Metric aggregations produce simple results and can't contain nested aggregations.
|
||||
|
||||
Bucket aggregations produce buckets of documents that you can nest in other aggregations. You can perform complex analysis on your data by nesting metric and bucket aggregations within bucket aggregations.
|
||||
|
||||
### General nested aggregation syntax
|
||||
|
||||
```json
|
||||
{
|
||||
"aggs": {
|
||||
"name": {
|
||||
"type": {
|
||||
"data"
|
||||
},
|
||||
"aggs": {
|
||||
"nested": {
|
||||
"type": {
|
||||
"data"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The inner `aggs` keyword begins a new nested aggregation. The syntax of the parent aggregation and the nested aggregation is the same. Nested aggregations run in the context of the preceding parent aggregations.
|
||||
|
||||
You can also pair your aggregations with search queries to narrow down things you’re trying to analyze before aggregating. If you don't add a query, OpenSearch implicitly uses the `match_all` query.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,256 @@
|
|||
---
|
||||
layout: default
|
||||
title: CAT API
|
||||
nav_order: 20
|
||||
---
|
||||
|
||||
# cat API
|
||||
|
||||
You can get essential statistics about your cluster in an easy-to-understand, tabular format using the compact and aligned text (CAT) API. The cat API is a human-readable interface that returns plain text instead of traditional JSON.
|
||||
|
||||
Using the cat API, you can answer questions like which node is the elected master, what state is the cluster in, how many documents are in each index, and so on.
|
||||
|
||||
To see the available operations in the cat API, use the following command:
|
||||
|
||||
```
|
||||
GET _cat
|
||||
```
|
||||
|
||||
You can also use the following string parameters with your query.
|
||||
|
||||
Parameter | Description
|
||||
:--- | :--- |
|
||||
`?v` | Makes the output more verbose by adding headers to the columns. It also adds some formatting to help align each of the columns together. All examples on this page include the `v` parameter.
|
||||
`?help` | Lists the default and other available headers for a given operation.
|
||||
`?h` | Limits the output to specific headers.
|
||||
`?format` | Outputs the result in JSON, YAML, or CBOR formats.
|
||||
`?sort` | Sorts the output by the specified columns.
|
||||
|
||||
To see what each column represents, use the `?v` parameter:
|
||||
|
||||
```
|
||||
GET _cat/<operation_name>?v
|
||||
```
|
||||
|
||||
To see all the available headers, use the `?help` parameter:
|
||||
|
||||
```
|
||||
GET _cat/<operation_name>?help
|
||||
```
|
||||
|
||||
To limit the output to a subset of headers, use the `?h` parameter:
|
||||
|
||||
```
|
||||
GET _cat/<operation_name>?h=<header_name_1>,<header_name_2>&v
|
||||
```
|
||||
|
||||
Typically, for any operation you can find out what headers are available using the `?help` parameter, and then use the `?h` parameter to limit the output to only the headers that you care about.
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
---
|
||||
## Aliases
|
||||
|
||||
Lists the mapping of aliases to indices, plus routing and filtering information.
|
||||
|
||||
```
|
||||
GET _cat/aliases?v
|
||||
```
|
||||
|
||||
To limit the information to a specific alias, add the alias name after your query.
|
||||
|
||||
```
|
||||
GET _cat/aliases/<alias>?v
|
||||
```
|
||||
|
||||
## Allocation
|
||||
|
||||
Lists the allocation of disk space for indices and the number of shards on each node.
|
||||
Default request:
|
||||
```
|
||||
GET _cat/allocation?v
|
||||
```
|
||||
|
||||
## Count
|
||||
|
||||
Lists the number of documents in your cluster.
|
||||
|
||||
```
|
||||
GET _cat/count?v
|
||||
```
|
||||
|
||||
To see the number of documents in a specific index, add the index name after your query.
|
||||
|
||||
```
|
||||
GET _cat/count/<index>?v
|
||||
```
|
||||
|
||||
## Field data
|
||||
|
||||
Lists the memory size used by each field per node.
|
||||
|
||||
```
|
||||
GET _cat/fielddata?v
|
||||
```
|
||||
|
||||
To limit the information to a specific field, add the field name after your query.
|
||||
|
||||
```
|
||||
GET _cat/fielddata/<fields>?v
|
||||
```
|
||||
|
||||
## Health
|
||||
|
||||
Lists the status of the cluster, how long the cluster has been up, the number of nodes, and other useful information that helps you analyze the health of your cluster.
|
||||
|
||||
```
|
||||
GET _cat/health?v
|
||||
```
|
||||
|
||||
## Indices
|
||||
|
||||
Lists information related to indices—how much disk space they are using, how many shards they have, their health status, and so on.
|
||||
|
||||
```
|
||||
GET _cat/indices?v
|
||||
```
|
||||
|
||||
To limit the information to a specific index, add the index name after your query.
|
||||
|
||||
```
|
||||
GET _cat/indices/<index>?v
|
||||
```
|
||||
|
||||
## Master
|
||||
|
||||
Lists information that helps identify the elected master node.
|
||||
|
||||
```
|
||||
GET _cat/master?v
|
||||
```
|
||||
|
||||
## Node attributes
|
||||
|
||||
Lists the attributes of custom nodes.
|
||||
|
||||
```
|
||||
GET _cat/nodeattrs?v
|
||||
```
|
||||
|
||||
## Nodes
|
||||
|
||||
Lists node-level information, including node roles and load metrics.
|
||||
|
||||
A few important node metrics are `pid`, `name`, `master`, `ip`, `port`, `version`, `build`, `jdk`, along with `disk`, `heap`, `ram`, and `file_desc`.
|
||||
|
||||
```
|
||||
GET _cat/nodes?v
|
||||
```
|
||||
|
||||
## Pending tasks
|
||||
|
||||
Lists the progress of all pending tasks, including task priority and time in queue.
|
||||
|
||||
```
|
||||
GET _cat/pending_tasks?v
|
||||
```
|
||||
|
||||
## Plugins
|
||||
|
||||
Lists the names, components, and versions of the installed plugins.
|
||||
|
||||
```
|
||||
GET _cat/plugins?v
|
||||
```
|
||||
|
||||
## Recovery
|
||||
|
||||
Lists all completed and ongoing index and shard recoveries.
|
||||
|
||||
```
|
||||
GET _cat/recovery?v
|
||||
```
|
||||
|
||||
To see only the recoveries of a specific index, add the index name after your query.
|
||||
|
||||
```
|
||||
GET _cat/recovery/<index>?v
|
||||
```
|
||||
|
||||
## Repositories
|
||||
|
||||
Lists all snapshot repositories and their types.
|
||||
|
||||
```
|
||||
GET _cat/repositories?v
|
||||
```
|
||||
|
||||
## Segments
|
||||
|
||||
Lists Lucene segment-level information for each index.
|
||||
|
||||
```
|
||||
GET _cat/segments?v
|
||||
```
|
||||
|
||||
To see only the information about segments of a specific index, add the index name after your query.
|
||||
|
||||
```
|
||||
GET _cat/segments/<index>?v
|
||||
```
|
||||
|
||||
## Shards
|
||||
|
||||
Lists the state of all primary and replica shards and how they are distributed.
|
||||
|
||||
```
|
||||
GET _cat/shards?v
|
||||
```
|
||||
|
||||
To see only the information about shards of a specific index, add the index name after your query.
|
||||
|
||||
```
|
||||
GET _cat/shards/<index>?v
|
||||
```
|
||||
|
||||
## Snapshots
|
||||
|
||||
Lists all snapshots for a repository.
|
||||
|
||||
```
|
||||
GET _cat/snapshots/<repository>?v
|
||||
```
|
||||
|
||||
## Tasks
|
||||
|
||||
Lists the progress of all tasks currently running on your cluster.
|
||||
|
||||
```
|
||||
GET _cat/tasks?v
|
||||
```
|
||||
|
||||
## Templates
|
||||
|
||||
Lists the names, patterns, order numbers, and version numbers of index templates.
|
||||
|
||||
```
|
||||
GET _cat/templates?v
|
||||
```
|
||||
|
||||
## Thread pool
|
||||
|
||||
Lists the active, queued, and rejected threads of different thread pools on each node.
|
||||
|
||||
```
|
||||
GET _cat/thread_pool?v
|
||||
```
|
||||
|
||||
To limit the information to a specific thread pool, add the thread pool name after your query.
|
||||
|
||||
```
|
||||
GET _cat/thread_pool/<thread_pool>?v
|
||||
```
|
|
@ -0,0 +1,338 @@
|
|||
---
|
||||
layout: default
|
||||
title: Cluster formation
|
||||
nav_order: 7
|
||||
---
|
||||
|
||||
# Cluster formation
|
||||
|
||||
Before diving into OpenSearch and searching and aggregating data, you first need to create an OpenSearch cluster.
|
||||
|
||||
OpenSearch can operate as a single-node or multi-node cluster. The steps to configure both are, in general, quite similar. This page demonstrates how to create and configure a multi-node cluster, but with only a few minor adjustments, you can follow the same steps to create a single-node cluster.
|
||||
|
||||
To create and deploy an OpenSearch cluster according to your requirements, it’s important to understand how node discovery and cluster formation work and what settings govern them.
|
||||
|
||||
There are many ways to design a cluster. The following illustration shows a basic architecture:
|
||||
|
||||
![multi-node cluster architecture diagram](../../images/cluster.png)
|
||||
|
||||
This is a four-node cluster that has one dedicated master node, one dedicated coordinating node, and two data nodes that are master-eligible and also used for ingesting data.
|
||||
|
||||
The following table provides brief descriptions of the node types:
|
||||
|
||||
Node type | Description | Best practices for production
|
||||
:--- | :--- | :-- |
|
||||
`Master` | Manages the overall operation of a cluster and keeps track of the cluster state. This includes creating and deleting indices, keeping track of the nodes that join and leave the cluster, checking the health of each node in the cluster (by running ping requests), and allocating shards to nodes. | Three dedicated master nodes in three different zones is the right approach for almost all production use cases. This configuration ensures your cluster never loses quorum. Two nodes will be idle for most of the time except when one node goes down or needs some maintenance.
|
||||
`Master-eligible` | Elects one node among them as the master node through a voting process. | For production clusters, make sure you have dedicated master nodes. The way to achieve a dedicated node type is to mark all other node types as false. In this case, you have to mark all the other nodes as not master-eligible.
|
||||
`Data` | Stores and searches data. Performs all data-related operations (indexing, searching, aggregating) on local shards. These are the worker nodes of your cluster and need more disk space than any other node type. | As you add data nodes, keep them balanced between zones. For example, if you have three zones, add data nodes in multiples of three, one for each zone. We recommend using storage and RAM-heavy nodes.
|
||||
`Ingest` | Preprocesses data before storing it in the cluster. Runs an ingest pipeline that transforms your data before adding it to an index. | If you plan to ingest a lot of data and run complex ingest pipelines, we recommend you use dedicated ingest nodes. You can also optionally offload your indexing from the data nodes so that your data nodes are used exclusively for searching and aggregating.
|
||||
`Coordinating` | Delegates client requests to the shards on the data nodes, collects and aggregates the results into one final result, and sends this result back to the client. | A couple of dedicated coordinating-only nodes is appropriate to prevent bottlenecks for search-heavy workloads. We recommend using CPUs with as many cores as you can.
|
||||
|
||||
By default, each node is a master-eligible, data, ingest, and coordinating node. Deciding on the number of nodes, assigning node types, and choosing the hardware for each node type depends on your use case. You must take into account factors like the amount of time you want to hold on to your data, the average size of your documents, your typical workload (indexing, searches, aggregations), your expected price-performance ratio, your risk tolerance, and so on.
|
||||
|
||||
After you assess all these requirements, we recommend you use a benchmark testing tool like Rally to provision a small sample cluster and run tests with varying workloads and configurations. Compare and analyze the system and query metrics for these tests to design an optimum architecture. To get started with Rally, see the [Rally documentation](https://esrally.readthedocs.io/en/stable/).
|
||||
|
||||
This page demonstrates how to work with the different node types. It assumes that you have a four-node cluster similar to the preceding illustration.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before you get started, you must install and configure OpenSearch on all of your nodes. For information about the available options, see [Install and configure OpenSearch](../../install/).
|
||||
|
||||
After you're done, use SSH to connect to each node, then open the `config/opensearch.yml` file. You can set all configurations for your cluster in this file.
|
||||
|
||||
## Step 1: Name a cluster
|
||||
|
||||
Specify a unique name for the cluster. If you don't specify a cluster name, it's set to `opensearch` by default. Setting a descriptive cluster name is important, especially if you want to run multiple clusters inside a single network.
|
||||
|
||||
To specify the cluster name, change the following line:
|
||||
|
||||
```yml
|
||||
#cluster.name: my-application
|
||||
```
|
||||
|
||||
to
|
||||
|
||||
```yml
|
||||
cluster.name: opensearch-cluster
|
||||
```
|
||||
|
||||
Make the same change on all the nodes to make sure that they'll join to form a cluster.
|
||||
|
||||
|
||||
## Step 2: Set node attributes for each node in a cluster
|
||||
|
||||
After you name the cluster, set node attributes for each node in your cluster.
|
||||
|
||||
|
||||
#### Master node
|
||||
|
||||
Give your master node a name. If you don't specify a name, OpenSearch assigns a machine-generated name that makes the node difficult to monitor and troubleshoot.
|
||||
|
||||
```yml
|
||||
node.name: opensearch-master
|
||||
```
|
||||
|
||||
You can also explicitly specify that this node is a master node. This is already true by default, but adding it makes it easier to identify the master node:
|
||||
|
||||
```yml
|
||||
node.master: true
|
||||
```
|
||||
|
||||
Then make the node a dedicated master that won’t perform double-duty as a data node:
|
||||
|
||||
```yml
|
||||
node.data: false
|
||||
```
|
||||
|
||||
Specify that this node will not be used for ingesting data:
|
||||
|
||||
```yml
|
||||
node.ingest: false
|
||||
```
|
||||
|
||||
#### Data nodes
|
||||
|
||||
Change the name of two nodes to `opensearch-d1` and `opensearch-d2`, respectively:
|
||||
|
||||
```yml
|
||||
node.name: opensearch-d1
|
||||
```
|
||||
```yml
|
||||
node.name: opensearch-d2
|
||||
```
|
||||
|
||||
You can make them master-eligible data nodes that will also be used for ingesting data:
|
||||
|
||||
```yml
|
||||
node.master: true
|
||||
node.data: true
|
||||
node.ingest: true
|
||||
```
|
||||
|
||||
You can also specify any other attributes that you'd like to set for the data nodes.
|
||||
|
||||
#### Coordinating node
|
||||
|
||||
Change the name of the coordinating node to `opensearch-c1`:
|
||||
|
||||
```yml
|
||||
node.name: opensearch-c1
|
||||
```
|
||||
|
||||
Every node is a coordinating node by default, so to make this node a dedicated coordinating node, set `node.master`, `node.data`, and `node.ingest` to `false`:
|
||||
|
||||
```yml
|
||||
node.master: false
|
||||
node.data: false
|
||||
node.ingest: false
|
||||
```
|
||||
|
||||
## Step 3: Bind a cluster to specific IP addresses
|
||||
|
||||
`network_host` defines the IP address used to bind the node. By default, OpenSearch listens on a local host, which limits the cluster to a single node. You can also use `_local_` and `_site_` to bind to any loopback or site-local address, whether IPv4 or IPv6:
|
||||
|
||||
```yml
|
||||
network.host: [_local_, _site_]
|
||||
```
|
||||
|
||||
To form a multi-node cluster, specify the IP address of the node:
|
||||
|
||||
```yml
|
||||
network.host: <IP address of the node>
|
||||
```
|
||||
|
||||
|
||||
Make sure to configure these settings on all of your nodes.
|
||||
|
||||
|
||||
## Step 4: Configure discovery hosts for a cluster
|
||||
|
||||
Now that you've configured the network hosts, you need to configure the discovery hosts.
|
||||
|
||||
Zen Discovery is the built-in, default mechanism that uses [unicast](https://en.wikipedia.org/wiki/Unicast) to find other nodes in the cluster.
|
||||
|
||||
You can generally just add all your master-eligible nodes to the `discovery.seed_hosts` array. When a node starts up, it finds the other master-eligible nodes, determines which one is the master, and asks to join the cluster.
|
||||
|
||||
For example, for `opensearch-master` the line looks something like this:
|
||||
|
||||
```yml
|
||||
discovery.seed_hosts: ["<private IP of opensearch-d1>", "<private IP of opensearch-d2>", "<private IP of opensearch-c1>"]
|
||||
```
|
||||
|
||||
|
||||
## Step 5: Start the cluster
|
||||
|
||||
After you set the configurations, start OpenSearch on all nodes:
|
||||
|
||||
```bash
|
||||
sudo systemctl start opensearch.service
|
||||
```
|
||||
|
||||
Then go to the logs file to see the formation of the cluster:
|
||||
|
||||
```bash
|
||||
less /var/log/opensearch/opensearch-cluster.log
|
||||
```
|
||||
|
||||
Perform the following `_cat` query on any node to see all the nodes formed as a cluster:
|
||||
|
||||
```bash
|
||||
curl -XGET https://<private-ip>:9200/_cat/nodes?v -u 'admin:admin' --insecure
|
||||
```
|
||||
|
||||
```
|
||||
ip heap.percent ram.percent cpu load_1m load_5m load_15m node.role master name
|
||||
x.x.x.x 13 61 0 0.02 0.04 0.05 mi * opensearch-master
|
||||
x.x.x.x 16 60 0 0.06 0.05 0.05 md - opensearch-d1
|
||||
x.x.x.x 34 38 0 0.12 0.07 0.06 md - opensearch-d2
|
||||
x.x.x.x 23 38 0 0.12 0.07 0.06 md - opensearch-c1
|
||||
```
|
||||
|
||||
To better understand and monitor your cluster, use the [cat API](../catapis/).
|
||||
|
||||
|
||||
## (Advanced) Step 6: Configure shard allocation awareness or forced awareness
|
||||
|
||||
If your nodes are spread across several geographical zones, you can configure shard allocation awareness to allocate all replica shards to a zone that’s different from their primary shard.
|
||||
|
||||
With shard allocation awareness, if the nodes in one of your zones fail, you can be assured that your replica shards are spread across your other zones. It adds a layer of fault tolerance to ensure your data survives a zone failure beyond just individual node failures.
|
||||
|
||||
To configure shard allocation awareness, add zone attributes to `opensearch-d1` and `opensearch-d2`, respectively:
|
||||
|
||||
```yml
|
||||
node.attr.zone: zoneA
|
||||
```
|
||||
```yml
|
||||
node.attr.zone: zoneB
|
||||
```
|
||||
|
||||
Update the cluster settings:
|
||||
|
||||
```json
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"persistent": {
|
||||
"cluster.routing.allocation.awareness.attributes": "zone"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can either use `persistent` or `transient` settings. We recommend the `persistent` setting because it persists through a cluster reboot. Transient settings don't persist through a cluster reboot.
|
||||
|
||||
Shard allocation awareness attempts to separate primary and replica shards across multiple zones. However, if only one zone is available (such as after a zone failure), OpenSearch allocates replica shards to the only remaining zone.
|
||||
|
||||
Another option is to require that primary and replica shards are never allocated to the same zone. This is called forced awareness.
|
||||
|
||||
To configure forced awareness, specify all the possible values for your zone attributes:
|
||||
|
||||
```json
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"persistent": {
|
||||
"cluster.routing.allocation.awareness.attributes": "zone",
|
||||
"cluster.routing.allocation.awareness.force.zone.values":["zoneA", "zoneB"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Now, if a data node fails, forced awareness doesn't allocate the replicas to a node in the same zone. Instead, the cluster enters a yellow state and only allocates the replicas when nodes in another zone come online.
|
||||
|
||||
In our two-zone architecture, we can use allocation awareness if `opensearch-d1` and `opensearch-d2` are less than 50% utilized, so that each of them have the storage capacity to allocate replicas in the same zone.
|
||||
If that is not the case, and `opensearch-d1` and `opensearch-d2` do not have the capacity to contain all primary and replica shards, we can use forced awareness. This approach helps to make sure that, in the event of a failure, OpenSearch doesn't overload your last remaining zone and lock up your cluster due to lack of storage.
|
||||
|
||||
Choosing allocation awareness or forced awareness depends on how much space you might need in each zone to balance your primary and replica shards.
|
||||
|
||||
|
||||
## (Advanced) Step 7: Set up a hot-warm architecture
|
||||
|
||||
You can design a hot-warm architecture where you first index your data to hot nodes---fast and expensive---and after a certain period of time move them to warm nodes---slow and cheap.
|
||||
|
||||
If you analyze time series data that you rarely update and want the older data to go onto cheaper storage, this architecture can be a good fit.
|
||||
|
||||
This architecture helps save money on storage costs. Rather than increasing the number of hot nodes and using fast, expensive storage, you can add warm nodes for data that you don't access as frequently.
|
||||
|
||||
To configure a hot-warm storage architecture, add `temp` attributes to `opensearch-d1` and `opensearch-d2`, respectively:
|
||||
|
||||
```yml
|
||||
node.attr.temp: hot
|
||||
```
|
||||
```yml
|
||||
node.attr.temp: warm
|
||||
```
|
||||
|
||||
You can set the attribute name and value to whatever you want as long as it’s consistent for all your hot and warm nodes.
|
||||
|
||||
To add an index `newindex` to the hot node:
|
||||
|
||||
```json
|
||||
PUT newindex
|
||||
{
|
||||
"settings": {
|
||||
"index.routing.allocation.require.temp": "hot"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Take a look at the following shard allocation for `newindex`:
|
||||
|
||||
```json
|
||||
GET _cat/shards/newindex?v
|
||||
index shard prirep state docs store ip node
|
||||
new_index 2 p STARTED 0 230b 10.0.0.225 opensearch-d1
|
||||
new_index 2 r UNASSIGNED
|
||||
new_index 3 p STARTED 0 230b 10.0.0.225 opensearch-d1
|
||||
new_index 3 r UNASSIGNED
|
||||
new_index 4 p STARTED 0 230b 10.0.0.225 opensearch-d1
|
||||
new_index 4 r UNASSIGNED
|
||||
new_index 1 p STARTED 0 230b 10.0.0.225 opensearch-d1
|
||||
new_index 1 r UNASSIGNED
|
||||
new_index 0 p STARTED 0 230b 10.0.0.225 opensearch-d1
|
||||
new_index 0 r UNASSIGNED
|
||||
```
|
||||
|
||||
In this example, all primary shards are allocated to `opensearch-d1`, which is our hot node. All replica shards are unassigned, because we're forcing this index to allocate only to hot nodes.
|
||||
|
||||
To add an index `oldindex` to the warm node:
|
||||
|
||||
```json
|
||||
PUT oldindex
|
||||
{
|
||||
"settings": {
|
||||
"index.routing.allocation.require.temp": "warm"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The shard allocation for `oldindex`:
|
||||
|
||||
```json
|
||||
GET _cat/shards/oldindex?v
|
||||
index shard prirep state docs store ip node
|
||||
old_index 2 p STARTED 0 230b 10.0.0.74 opensearch-d2
|
||||
old_index 2 r UNASSIGNED
|
||||
old_index 3 p STARTED 0 230b 10.0.0.74 opensearch-d2
|
||||
old_index 3 r UNASSIGNED
|
||||
old_index 4 p STARTED 0 230b 10.0.0.74 opensearch-d2
|
||||
old_index 4 r UNASSIGNED
|
||||
old_index 1 p STARTED 0 230b 10.0.0.74 opensearch-d2
|
||||
old_index 1 r UNASSIGNED
|
||||
old_index 0 p STARTED 0 230b 10.0.0.74 opensearch-d2
|
||||
old_index 0 r UNASSIGNED
|
||||
```
|
||||
|
||||
In this case, all primary shards are allocated to `opensearch-d2`. Again, all replica shards are unassigned because we only have one warm node.
|
||||
|
||||
A popular approach is to configure your [index templates](../index-templates/) to set the `index.routing.allocation.require.temp` value to `hot`. This way, OpenSearch stores your most recent data on your hot nodes.
|
||||
|
||||
You can then use the [Index State Management (ISM)](../../ism/index/) plugin to periodically check the age of an index and specify actions to take on it. For example, when the index reaches a specific age, change the `index.routing.allocation.require.temp` setting to `warm` to automatically move your data from hot nodes to warm nodes.
|
||||
|
||||
|
||||
## Next steps
|
||||
|
||||
If you are using the security plugin, the previous request to `_cat/nodes?v` might have failed with an initialization error. To initialize the plugin, run `opensearch/plugins/opensearch-security/tools/securityadmin.sh`. A sample command that uses the demo certificates might look like this:
|
||||
|
||||
```bash
|
||||
sudo ./securityadmin.sh -cd ../securityconfig/ -icl -nhnv -cacert /etc/opensearch/root-ca.pem -cert /etc/opensearch/kirk.pem -key /etc/opensearch/kirk-key.pem -h <private-ip>
|
||||
```
|
||||
|
||||
For full guidance around configuration options, see [Security configuration](../../security/configuration).
|
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
layout: default
|
||||
title: Common REST Parameters
|
||||
nav_order: 93
|
||||
---
|
||||
|
||||
# Common REST parameters
|
||||
|
||||
OpenSearch supports the following parameters for all REST operations:
|
||||
|
||||
Option | Description | Example
|
||||
:--- | :--- | :---
|
||||
Human-readable output | To convert output units to human-readable values (for example, `1h` for 1 hour and `1kb` for 1,024 bytes), add `?human=true` to the request URL. | `GET <index_name>/_search?human=true`
|
||||
Pretty result | To get back JSON responses in a readable format, add `?pretty=true` to the request URL. | `GET <index_name>/_search?pretty=true`
|
||||
Content type | To specify the type of content in the request body, use the `Content-Type` key name in the request header. Most operations support JSON, YAML, and CBOR formats. | `POST _scripts/<template_name> -H 'Content-Type: application/json`
|
||||
Request body in query string | If the client library does not accept a request body for non-POST requests, use the `source` query string parameter to pass the request body. Also, specify the `source_content_type` parameter with a supported media type such as `application/json`. | `GET _search?source_content_type=application/json&source={"query":{"match_all":{}}}`
|
||||
Stack traces | To include the error stack trace in the response when an exception is raised, add `error_trace=true` to the request URL. | `GET <index_name>/_search?error_trace=true`
|
|
@ -0,0 +1,68 @@
|
|||
---
|
||||
layout: default
|
||||
title: Configuration
|
||||
nav_order: 5
|
||||
---
|
||||
|
||||
# OpenSearch configuration
|
||||
|
||||
Most OpenSearch configuration can take place in the cluster settings API. Certain operations require you to modify `opensearch.yml` and restart the cluster.
|
||||
|
||||
Whenever possible, use the cluster settings API instead; `opensearch.yml` is local to each node, whereas the API applies the setting to all nodes in the cluster.
|
||||
|
||||
|
||||
## Cluster settings API
|
||||
|
||||
The first step in changing a setting is to view the current settings:
|
||||
|
||||
```
|
||||
GET _cluster/settings?include_defaults=true
|
||||
```
|
||||
|
||||
For a more concise summary of non-default settings:
|
||||
|
||||
```
|
||||
GET _cluster/settings
|
||||
```
|
||||
|
||||
Three categories of setting exist in the cluster settings API: persistent, transient, and default. Persistent settings, well, persist after a cluster restart. After a restart, OpenSearch clears transient settings.
|
||||
|
||||
If you specify the same setting in multiple places, OpenSearch uses the following precedence:
|
||||
|
||||
1. Transient settings
|
||||
2. Persistent settings
|
||||
3. Settings from `opensearch.yml`
|
||||
4. Default settings
|
||||
|
||||
To change a setting, just specify the new one as either persistent or transient. This example shows the flat settings form:
|
||||
|
||||
```json
|
||||
PUT /_cluster/settings
|
||||
{
|
||||
"persistent" : {
|
||||
"action.auto_create_index" : false
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can also use the expanded form, which lets you copy and paste from the GET response and change existing values:
|
||||
|
||||
```json
|
||||
PUT /_cluster/settings
|
||||
{
|
||||
"persistent": {
|
||||
"action": {
|
||||
"auto_create_index": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Configuration file
|
||||
|
||||
You can find `opensearch.yml` in `/usr/share/opensearch/config/opensearch.yml` (Docker) or `/etc/opensearch/opensearch.yml` (RPM and DEB) on each node.
|
||||
|
||||
The demo configuration includes a number of settings for the security plugin that you should modify before using OpenSearch for a production workload. To learn more, see [Security](../../security/).
|
|
@ -0,0 +1,202 @@
|
|||
---
|
||||
layout: default
|
||||
title: Index aliases
|
||||
nav_order: 12
|
||||
---
|
||||
|
||||
# Index aliases
|
||||
|
||||
An alias is a virtual index name that can point to one or more indices.
|
||||
|
||||
If your data is spread across multiple indices, rather than keeping track of which indices to query, you can create an alias and query it instead.
|
||||
|
||||
For example, if you’re storing logs into indices based on the month and you frequently query the logs for the previous two months, you can create a `last_2_months` alias and update the indices it points to each month.
|
||||
|
||||
Because you can change the indices an alias points to at any time, referring to indices using aliases in your applications allows you to reindex your data without any downtime.
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Create aliases
|
||||
|
||||
To create an alias, use a POST request:
|
||||
|
||||
```json
|
||||
POST _aliases
|
||||
```
|
||||
|
||||
Use the `actions` method to specify the list of actions that you want to perform. This command creates an alias named `alias1` and adds `index-1` to this alias:
|
||||
|
||||
```json
|
||||
POST _aliases
|
||||
{
|
||||
"actions": [
|
||||
{
|
||||
"add": {
|
||||
"index": "index-1",
|
||||
"alias": "alias1"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
You should see the following response:
|
||||
|
||||
```json
|
||||
{
|
||||
"acknowledged": true
|
||||
}
|
||||
```
|
||||
|
||||
If this request fails, make sure the index that you're adding to the alias already exists.
|
||||
|
||||
To check if `alias1` refers to `index-1`, run the following command:
|
||||
|
||||
```json
|
||||
GET alias1
|
||||
```
|
||||
|
||||
## Add or remove indices
|
||||
|
||||
You can perform multiple actions in the same `_aliases` operation.
|
||||
For example, the following command removes `index-1` and adds `index-2` to `alias1`:
|
||||
|
||||
```json
|
||||
POST _aliases
|
||||
{
|
||||
"actions": [
|
||||
{
|
||||
"remove": {
|
||||
"index": "index-1",
|
||||
"alias": "alias1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"add": {
|
||||
"index": "index-2",
|
||||
"alias": "alias1"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `add` and `remove` actions occur atomically, which means that at no point will `alias1` point to both `index-1` and `index-2`.
|
||||
|
||||
You can also add indices based on an index pattern:
|
||||
|
||||
```json
|
||||
POST _aliases
|
||||
{
|
||||
"actions": [
|
||||
{
|
||||
"add": {
|
||||
"index": "index*",
|
||||
"alias": "alias1"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Manage aliases
|
||||
|
||||
To list the mapping of aliases to indices, run the following command:
|
||||
|
||||
```json
|
||||
GET _cat/aliases?v
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
alias index filter routing.index routing.search
|
||||
alias1 index-1 * - -
|
||||
```
|
||||
|
||||
To check which indices an alias points to, run the following command:
|
||||
|
||||
```json
|
||||
GET _alias/alias1
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"index-2": {
|
||||
"aliases": {
|
||||
"alias1": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Conversely, to find which alias points to a specific index, run the following command:
|
||||
|
||||
```json
|
||||
GET /index-2/_alias/*
|
||||
```
|
||||
|
||||
To check if an alias exists, run the following command:
|
||||
|
||||
```json
|
||||
HEAD /alias1/_alias/
|
||||
```
|
||||
|
||||
## Add aliases at index creation
|
||||
|
||||
You can add an index to an alias as you create the index:
|
||||
|
||||
```json
|
||||
PUT index-1
|
||||
{
|
||||
"aliases": {
|
||||
"alias1": {}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Create filtered aliases
|
||||
|
||||
You can create a filtered alias to access a subset of documents or fields from the underlying indices.
|
||||
|
||||
This command adds only a specific timestamp field to `alias1`:
|
||||
|
||||
```json
|
||||
POST _aliases
|
||||
{
|
||||
"actions": [
|
||||
{
|
||||
"add": {
|
||||
"index": "index-1",
|
||||
"alias": "alias1",
|
||||
"filter": {
|
||||
"term": {
|
||||
"timestamp": "1574641891142"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Index alias options
|
||||
|
||||
You can specify the options shown in the following table.
|
||||
|
||||
Option | Valid values | Description | Required
|
||||
:--- | :--- | :---
|
||||
`index` | String | The name of the index that the alias points to. | Yes
|
||||
`alias` | String | The name of the alias. | No
|
||||
`filter` | Object | Add a filter to the alias. | No
|
||||
`routing` | String | Limit search to an associated shard value. You can specify `search_routing` and `index_routing` independently. | No
|
||||
`is_write_index` | String | Specify the index that accepts any write operations to the alias. If this value is not specified, then no write operations are allowed. | No
|
|
@ -0,0 +1,271 @@
|
|||
---
|
||||
layout: default
|
||||
title: Index data
|
||||
nav_order: 10
|
||||
---
|
||||
|
||||
# Index data
|
||||
|
||||
You index data using the OpenSearch REST API. Two APIs exist: the index API and the `_bulk` API.
|
||||
|
||||
For situations in which new data arrives incrementally (for example, customer orders from a small business), you might use the index API to add documents individually as they arrive. For situations in which the flow of data is less frequent (for example, weekly updates to a marketing website), you might prefer to generate a file and send it to the `_bulk` API. For large numbers of documents, lumping requests together and using the `_bulk` API offers superior performance. If your documents are enormous, however, you might need to index them individually.
|
||||
|
||||
|
||||
## Introduction to indexing
|
||||
|
||||
Before you can search data, you must *index* it. Indexing is the method by which search engines organize data for fast retrieval. The resulting structure is called, fittingly, an index.
|
||||
|
||||
In OpenSearch, the basic unit of data is a JSON *document*. Within an index, OpenSearch identifies each document using a unique ID.
|
||||
|
||||
A request to the index API looks like this:
|
||||
|
||||
```json
|
||||
PUT <index>/_doc/<id>
|
||||
{ "A JSON": "document" }
|
||||
```
|
||||
|
||||
A request to the `_bulk` API looks a little different, because you specify the index and ID in the bulk data:
|
||||
|
||||
```json
|
||||
POST _bulk
|
||||
{ "index": { "_index": "<index>", "_id": "<id>" } }
|
||||
{ "A JSON": "document" }
|
||||
```
|
||||
|
||||
Bulk data must conform to a specific format, which requires a newline character (`\n`) at the end of every line, including the last line. This is the basic format:
|
||||
|
||||
```
|
||||
Action and metadata\n
|
||||
Optional document\n
|
||||
Action and metadata\n
|
||||
Optional document\n
|
||||
```
|
||||
|
||||
The document is optional, because `delete` actions don't require a document. The other actions (`index`, `create`, and `update`) all require a document. If you specifically want the action to fail if the document already exists, use the `create` action instead of the `index` action.
|
||||
{: .note }
|
||||
|
||||
To index bulk data using the `curl` command, navigate to the folder where you have your file saved and run the following command:
|
||||
|
||||
```json
|
||||
curl -H "Content-Type: application/x-ndjson" -POST https://localhost:9200/data/_bulk -u 'admin:admin' --insecure --data-binary "@data.json"
|
||||
```
|
||||
|
||||
If any one of the actions in the `_bulk` API fail, OpenSearch continues to execute the other actions. Examine the `items` array in the response to figure out what went wrong. The entries in the `items` array are in the same order as the actions specified in the request.
|
||||
|
||||
OpenSearch automatically creates an index when you add a document to an index that doesn't already exist. It also automatically generates an ID if you don't specify an ID in the request. This simple example automatically creates the movies index, indexes the document, and assigns it a unique ID:
|
||||
|
||||
```json
|
||||
POST movies/_doc
|
||||
{ "title": "Spirited Away" }
|
||||
```
|
||||
|
||||
Automatic ID generation has a clear downside: because the indexing request didn't specify a document ID, you can't easily update the document at a later time. Also, if you run this request 10 times, OpenSearch indexes this document as 10 different documents with unique IDs. To specify an ID of 1, use the following request (note the use of PUT instead of POST):
|
||||
|
||||
```json
|
||||
PUT movies/_doc/1
|
||||
{ "title": "Spirited Away" }
|
||||
```
|
||||
|
||||
Because you must specify an ID, if you run this command 10 times, you still have just one document indexed with the `_version` field incremented to 10.
|
||||
|
||||
Indices default to one primary shard and one replica. If you want to specify non-default settings, create the index before adding documents:
|
||||
|
||||
```json
|
||||
PUT more-movies
|
||||
{ "settings": { "number_of_shards": 6, "number_of_replicas": 2 } }
|
||||
```
|
||||
|
||||
## Naming restrictions for indices
|
||||
|
||||
OpenSearch indices have the following naming restrictions:
|
||||
|
||||
- All letters must be lowercase.
|
||||
- Index names can't begin with underscores (`_`) or hyphens (`-`).
|
||||
- Index names can't contain spaces, commas, or the following characters:
|
||||
|
||||
`:`, `"`, `*`, `+`, `/`, `\`, `|`, `?`, `#`, `>`, or `<`
|
||||
|
||||
## Read data
|
||||
|
||||
After you index a document, you can retrieve it by sending a GET request to the same endpoint that you used for indexing:
|
||||
|
||||
```json
|
||||
GET movies/_doc/1
|
||||
|
||||
{
|
||||
"_index" : "movies",
|
||||
"_type" : "_doc",
|
||||
"_id" : "1",
|
||||
"_version" : 1,
|
||||
"_seq_no" : 0,
|
||||
"_primary_term" : 1,
|
||||
"found" : true,
|
||||
"_source" : {
|
||||
"title" : "Spirited Away"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can see the document in the `_source` object. If the document is not found, the `found` key is `false` and the `_source` object is not part of the response.
|
||||
|
||||
To retrieve multiple documents with a single command, use the `_mget` operation.
|
||||
The format for retrieving multiple documents is similar to the `_bulk` operation, where you must specify the index and ID in the request body:
|
||||
|
||||
```json
|
||||
GET _mget
|
||||
{
|
||||
"docs": [
|
||||
{
|
||||
"_index": "<index>",
|
||||
"_id": "<id>"
|
||||
},
|
||||
{
|
||||
"_index": "<index>",
|
||||
"_id": "<id>"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
To only return specific fields in a document:
|
||||
|
||||
```json
|
||||
GET _mget
|
||||
{
|
||||
"docs": [
|
||||
{
|
||||
"_index": "<index>",
|
||||
"_id": "<id>",
|
||||
"_source": "field1"
|
||||
},
|
||||
{
|
||||
"_index": "<index>",
|
||||
"_id": "<id>",
|
||||
"_source": "field2"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
To check if a document exists:
|
||||
|
||||
```json
|
||||
HEAD movies/_doc/<doc-id>
|
||||
```
|
||||
|
||||
If the document exists, you get back a `200 OK` response, and if it doesn't, you get back a `404 - Not Found` error.
|
||||
|
||||
## Update data
|
||||
|
||||
To update existing fields or to add new fields, send a POST request to the `_update` operation with your changes in a `doc` object:
|
||||
|
||||
```json
|
||||
POST movies/_update/1
|
||||
{
|
||||
"doc": {
|
||||
"title": "Castle in the Sky",
|
||||
"genre": ["Animation", "Fantasy"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Note the updated `title` field and new `genre` field:
|
||||
|
||||
```json
|
||||
GET movies/_doc/1
|
||||
|
||||
{
|
||||
"_index" : "movies",
|
||||
"_type" : "_doc",
|
||||
"_id" : "1",
|
||||
"_version" : 2,
|
||||
"_seq_no" : 1,
|
||||
"_primary_term" : 1,
|
||||
"found" : true,
|
||||
"_source" : {
|
||||
"title" : "Castle in the Sky",
|
||||
"genre" : [
|
||||
"Animation",
|
||||
"Fantasy"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The document also has an incremented `_version` field. Use this field to keep track of how many times a document is updated.
|
||||
|
||||
POST requests make partial updates to documents. To altogether replace a document, use a PUT request:
|
||||
|
||||
```json
|
||||
PUT movies/_doc/1
|
||||
{
|
||||
"title": "Spirited Away"
|
||||
}
|
||||
```
|
||||
|
||||
The document with ID of 1 will contain only the `title` field, because the entire document will be replaced with the document indexed in this PUT request.
|
||||
|
||||
Use the `upsert` object to conditionally update documents based on whether they already exist. Here, if the document exists, its `title` field changes to `Castle in the Sky`. If it doesn't, OpenSearch indexes the document in the `upsert` object.
|
||||
|
||||
```json
|
||||
POST movies/_update/2
|
||||
{
|
||||
"doc": {
|
||||
"title": "Castle in the Sky"
|
||||
},
|
||||
"upsert": {
|
||||
"title": "Only Yesterday",
|
||||
"genre": ["Animation", "Fantasy"],
|
||||
"date": 1993
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"_index" : "movies",
|
||||
"_type" : "_doc",
|
||||
"_id" : "2",
|
||||
"_version" : 2,
|
||||
"result" : "updated",
|
||||
"_shards" : {
|
||||
"total" : 2,
|
||||
"successful" : 1,
|
||||
"failed" : 0
|
||||
},
|
||||
"_seq_no" : 3,
|
||||
"_primary_term" : 1
|
||||
}
|
||||
```
|
||||
|
||||
Each update operation for a document has a unique combination of the `_seq_no` and `_primary_term` values.
|
||||
|
||||
OpenSearch first writes your updates to the primary shard and then sends this change to all the replica shards. An uncommon issue can occur if multiple users of your OpenSearch-based application make updates to existing documents in the same index. In this situation, another user can read and update a document from a replica before it receives your update from the primary shard. Your update operation then ends up updating an older version of the document. In the best case, you and the other user make the same changes, and the document remains accurate. In the worst case, the document now contains out-of-date information.
|
||||
|
||||
To prevent this situation, use the `_seq_no` and `_primary_term` values in the request header:
|
||||
|
||||
```json
|
||||
POST movies/_update/2?if_seq_no=3&if_primary_term=1
|
||||
{
|
||||
"doc": {
|
||||
"title": "Castle in the Sky",
|
||||
"genre": ["Animation", "Fantasy"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If the document is updated after we retrieved it, the `_seq_no` and `_primary_term` values are different and our update operation fails with a `409 — Conflict` error.
|
||||
|
||||
When using the `_bulk` API, specify the `_seq_no` and `_primary_term` values within the action metadata.
|
||||
|
||||
## Delete data
|
||||
|
||||
To delete a document from an index, use a DELETE request:
|
||||
|
||||
```json
|
||||
DELETE movies/_doc/1
|
||||
```
|
||||
|
||||
The DELETE operation increments the `_version` field. If you add the document back to the same ID, the `_version` field increments again. This behavior occurs because OpenSearch deletes the document `_source`, but retains its metadata.
|
|
@ -0,0 +1,202 @@
|
|||
---
|
||||
layout: default
|
||||
title: Index templates
|
||||
nav_order: 14
|
||||
---
|
||||
|
||||
# Index templates
|
||||
|
||||
Index templates let you initialize new indices with predefined mappings and settings. For example, if you continuously index log data, you can define an index template so that all of these indices have the same number of shards and replicas.
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Create a template
|
||||
|
||||
To create an index template, use a POST request:
|
||||
|
||||
```json
|
||||
POST _index_template
|
||||
```
|
||||
|
||||
This command creates a template named `daily_logs` and applies it to any new index whose name matches the regular expression `logs-2020-01-*` and also adds it to the `my_logs` alias:
|
||||
|
||||
```json
|
||||
PUT _index_template/daily_logs
|
||||
{
|
||||
"index_patterns": [
|
||||
"logs-2020-01-*"
|
||||
],
|
||||
"template": {
|
||||
"aliases": {
|
||||
"my_logs": {}
|
||||
},
|
||||
"settings": {
|
||||
"number_of_shards": 2,
|
||||
"number_of_replicas": 1
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"timestamp": {
|
||||
"type": "date",
|
||||
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
|
||||
},
|
||||
"value": {
|
||||
"type": "double"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You should see the following response:
|
||||
|
||||
```json
|
||||
{
|
||||
"acknowledged": true
|
||||
}
|
||||
```
|
||||
|
||||
If you create an index named `logs-2020-01-01`, you can see that it has the mappings and settings from the template:
|
||||
|
||||
```json
|
||||
PUT logs-2020-01-01
|
||||
GET logs-2020-01-01
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"logs-2020-01-01": {
|
||||
"aliases": {
|
||||
"my_logs": {}
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"timestamp": {
|
||||
"type": "date",
|
||||
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
|
||||
},
|
||||
"value": {
|
||||
"type": "double"
|
||||
}
|
||||
}
|
||||
},
|
||||
"settings": {
|
||||
"index": {
|
||||
"creation_date": "1578107970779",
|
||||
"number_of_shards": "2",
|
||||
"number_of_replicas": "1",
|
||||
"uuid": "U1vMDMOHSAuS2IzPcPHpOA",
|
||||
"version": {
|
||||
"created": "7010199"
|
||||
},
|
||||
"provided_name": "logs-2020-01-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Any additional indices that match this pattern---`logs-2020-01-02`, `logs-2020-01-03`, and so on---will inherit the same mappings and settings.
|
||||
|
||||
## Retrieve a template
|
||||
|
||||
To list all index templates:
|
||||
|
||||
```json
|
||||
GET _cat/templates
|
||||
```
|
||||
|
||||
To find a template by its name:
|
||||
|
||||
```json
|
||||
GET _index_template/daily_logs
|
||||
```
|
||||
|
||||
To get a list of all your templates:
|
||||
|
||||
```json
|
||||
GET _index_template/daily_logs
|
||||
```
|
||||
|
||||
To get a list of all templates that match a pattern:
|
||||
|
||||
```json
|
||||
GET _index_template/daily*
|
||||
```
|
||||
|
||||
To check if a specific template exists:
|
||||
|
||||
```json
|
||||
HEAD _index_template/<name>
|
||||
```
|
||||
|
||||
## Configure multiple templates
|
||||
|
||||
You can create multiple index templates for your indices. If the index name matches more than one template, OpenSearch merges all mappings and settings from all matching templates and applies them to the index.
|
||||
|
||||
The settings from the more recently created index templates override the settings of older index templates. So, you can first define a few common settings in a generic template that can act as a catch-all and then add more specialized settings as required.
|
||||
|
||||
An even better approach is to explicitly specify template priority using the `order` parameter. OpenSearch applies templates with lower priority numbers first and then overrides them with templates with higher priority numbers.
|
||||
|
||||
For example, say you have the following two templates that both match the `logs-2020-01-02` index and there’s a conflict in the `number_of_shards` field:
|
||||
|
||||
#### Template 1
|
||||
|
||||
```json
|
||||
PUT _index_template/template-01
|
||||
{
|
||||
"index_patterns": [
|
||||
"logs*"
|
||||
],
|
||||
"priority": 0,
|
||||
"template": {
|
||||
"settings": {
|
||||
"number_of_shards": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Template 2
|
||||
|
||||
```json
|
||||
PUT _index_template/template-02
|
||||
{
|
||||
"index_patterns": [
|
||||
"logs-2020-01-*"
|
||||
],
|
||||
"priority": 1,
|
||||
"template": {
|
||||
"settings": {
|
||||
"number_of_shards": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Because `template-02` has a higher `priority` value, it takes precedence over `template-01` . The `logs-2020-01-02` index would have the `number_of_shards` value as 3.
|
||||
|
||||
## Delete a template
|
||||
|
||||
You can delete an index template using its name:
|
||||
|
||||
```json
|
||||
DELETE _index_template/daily_logs
|
||||
```
|
||||
|
||||
## Index template options
|
||||
|
||||
You can specify the following template options:
|
||||
|
||||
Option | Type | Description | Required
|
||||
:--- | :--- | :--- | :---
|
||||
`priority` | `Number` | The priority of the index template. | No
|
||||
`create` | `Boolean` | Whether this index template should replace an existing one. | No
|
|
@ -0,0 +1,89 @@
|
|||
---
|
||||
layout: default
|
||||
title: Introduction to OpenSearch
|
||||
nav_order: 1
|
||||
---
|
||||
|
||||
# Introduction to OpenSearch
|
||||
|
||||
OpenSearch is a distributed search and analytics engine based on [Apache Lucene](https://lucene.apache.org/). After adding your data to OpenSearch, you can perform full-text searches on it with all of the features you might expect: search by field, search multiple indices, boost fields, rank results by score, sort results by field, and aggregate results.
|
||||
|
||||
Unsurprisingly, people often use OpenSearch as the backend for a search application---think [Wikipedia](https://en.wikipedia.org/wiki/Wikipedia:FAQ/Technical#What_software_is_used_to_run_Wikipedia?) or an online store. It offers excellent performance and can scale up and down as the needs of the application grow or shrink.
|
||||
|
||||
An equally popular, but less obvious use case is log analytics, in which you take the logs from an application, feed them into OpenSearch, and use the rich search and visualization functionality to identify issues. For example, a malfunctioning web server might throw a 500 error 0.5% of the time, which can be hard to notice unless you have a real-time graph of all HTTP status codes that the server has thrown in the past four hours. You can use [OpenSearch Dashboards](../opensearch-dashboards/) to build these sorts of visualizations from data in OpenSearch.
|
||||
|
||||
|
||||
## Clusters and nodes
|
||||
|
||||
Its distributed design means that you interact with OpenSearch *clusters*. Each cluster is a collection of one or more *nodes*, servers that store your data and process search requests.
|
||||
|
||||
You can run OpenSearch locally on a laptop---its system requirements are minimal---but you can also scale a single cluster to hundreds of powerful machines in a data center.
|
||||
|
||||
In a single node cluster, such as a laptop, one machine has to do everything: manage the state of the cluster, index and search data, and perform any preprocessing of data prior to indexing it. As a cluster grows, however, you can subdivide responsibilities. Nodes with fast disks and plenty of RAM might be great at indexing and searching data, whereas a node with plenty of CPU power and a tiny disk could manage cluster state. For more information on setting node types, see [Cluster formation](cluster/).
|
||||
|
||||
|
||||
## Indices and documents
|
||||
|
||||
OpenSearch organizes data into *indices*. Each index is a collection of JSON *documents*. If you have a set of raw encyclopedia articles or log lines that you want to add to OpenSearch, you must first convert them to [JSON](https://www.json.org/). A simple JSON document for a movie might look like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "The Wind Rises",
|
||||
"release_date": "2013-07-20"
|
||||
}
|
||||
```
|
||||
|
||||
When you add the document to an index, OpenSearch adds some metadata, such as the unique document *ID*:
|
||||
|
||||
```json
|
||||
{
|
||||
"_index": "<index-name>",
|
||||
"_type": "_doc",
|
||||
"_id": "<document-id>",
|
||||
"_version": 1,
|
||||
"_source": {
|
||||
"title": "The Wind Rises",
|
||||
"release_date": "2013-07-20"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Indices also contain mappings and settings:
|
||||
|
||||
- A *mapping* is the collection of *fields* that documents in the index have. In this case, those fields are `title` and `release_date`.
|
||||
- Settings include data like the index name, creation date, and number of shards.
|
||||
|
||||
## Primary and replica shards
|
||||
|
||||
OpenSearch splits indices into *shards* for even distribution across nodes in a cluster. For example, a 400 GB index might be too large for any single node in your cluster to handle, but split into ten shards, each one 40 GB, OpenSearch can distribute the shards across ten nodes and work with each shard individually.
|
||||
|
||||
By default, OpenSearch creates a *replica* shard for each *primary* shard. If you split your index into ten shards, for example, OpenSearch also creates ten replica shards. These replica shards act as backups in the event of a node failure---OpenSearch distributes replica shards to different nodes than their corresponding primary shards---but they also improve the speed and rate at which the cluster can process search requests. You might specify more than one replica per index for a search-heavy workload.
|
||||
|
||||
Despite being a piece of an OpenSearch index, each shard is actually a full Lucene index---confusing, we know. This detail is important, though, because each instance of Lucene is a running process that consumes CPU and memory. More shards is not necessarily better. Splitting a 400 GB index into 1,000 shards, for example, would place needless strain on your cluster. A good rule of thumb is to keep shard size between 10--50 GB.
|
||||
|
||||
|
||||
## REST API
|
||||
|
||||
You interact with OpenSearch clusters using the REST API, which offers a lot of flexibility. You can use clients like [curl](https://curl.haxx.se/) or any programming language that can send HTTP requests. To add a JSON document to an OpenSearch index (i.e. index a document), you send an HTTP request:
|
||||
|
||||
```json
|
||||
PUT https://<host>:<port>/<index-name>/_doc/<document-id>
|
||||
{
|
||||
"title": "The Wind Rises",
|
||||
"release_date": "2013-07-20"
|
||||
}
|
||||
```
|
||||
|
||||
To run a search for the document:
|
||||
|
||||
```
|
||||
GET https://<host>:<port>/<index-name>/_search?q=wind
|
||||
```
|
||||
|
||||
To delete the document:
|
||||
|
||||
```
|
||||
DELETE https://<host>:<port>/<index-name>/_doc/<document-id>
|
||||
```
|
||||
|
||||
You can change most OpenSearch settings using the REST API, modify indices, check the health of the cluster, get statistics---almost everything.
|
|
@ -0,0 +1,178 @@
|
|||
---
|
||||
layout: default
|
||||
title: Docker security configuration
|
||||
parent: Install OpenSearch
|
||||
nav_order: 5
|
||||
---
|
||||
|
||||
# Docker security configuration
|
||||
|
||||
Before deploying to a production environment, you should replace the demo security certificates and configuration YAML files with your own. With the tarball, you have direct access to the file system, but the Docker image requires modifying the Docker storage volumes include the replacement files.
|
||||
|
||||
Additionally, you can set the Docker environment variable `DISABLE_INSTALL_DEMO_CONFIG` to `true`. This change completely disables the demo installer.
|
||||
|
||||
## Sample Docker Compose file
|
||||
|
||||
```yml
|
||||
version: '3'
|
||||
services:
|
||||
opensearch-node1:
|
||||
image: opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
container_name: opensearch-node1
|
||||
environment:
|
||||
- cluster.name=opensearch-cluster
|
||||
- node.name=opensearch-node1
|
||||
- discovery.seed_hosts=opensearch-node1,opensearch-node2
|
||||
- cluster.initial_master_nodes=opensearch-node1,opensearch-node2
|
||||
- bootstrap.memory_lock=true # along with the memlock settings below, disables swapping
|
||||
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM
|
||||
- network.host=0.0.0.0 # required if not using the demo security configuration
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
nofile:
|
||||
soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems
|
||||
hard: 65536
|
||||
volumes:
|
||||
- opensearch-data1:/usr/share/opensearch/data
|
||||
- ./root-ca.pem:/usr/share/opensearch/config/root-ca.pem
|
||||
- ./node.pem:/usr/share/opensearch/config/node.pem
|
||||
- ./node-key.pem:/usr/share/opensearch/config/node-key.pem
|
||||
- ./admin.pem:/usr/share/opensearch/config/admin.pem
|
||||
- ./admin-key.pem:/usr/share/opensearch/config/admin-key.pem
|
||||
- ./custom-opensearch.yml:/usr/share/opensearch/config/opensearch.yml
|
||||
- ./internal_users.yml:/usr/share/opensearch/plugins/opensearch-security/securityconfig/internal_users.yml
|
||||
- ./roles_mapping.yml:/usr/share/opensearch/plugins/opensearch-security/securityconfig/roles_mapping.yml
|
||||
- ./tenants.yml:/usr/share/opensearch/plugins/opensearch-security/securityconfig/tenants.yml
|
||||
- ./roles.yml:/usr/share/opensearch/plugins/opensearch-security/securityconfig/roles.yml
|
||||
- ./action_groups.yml:/usr/share/opensearch/plugins/opensearch-security/securityconfig/action_groups.yml
|
||||
ports:
|
||||
- 9200:9200
|
||||
- 9600:9600 # required for Performance Analyzer
|
||||
networks:
|
||||
- opensearch-net
|
||||
opensearch-node2:
|
||||
image: opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
container_name: opensearch-node2
|
||||
environment:
|
||||
- cluster.name=opensearch-cluster
|
||||
- node.name=opensearch-node2
|
||||
- discovery.seed_hosts=opensearch-node1,opensearch-node2
|
||||
- cluster.initial_master_nodes=opensearch-node1,opensearch-node2
|
||||
- bootstrap.memory_lock=true
|
||||
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
|
||||
- network.host=0.0.0.0
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
nofile:
|
||||
soft: 65536
|
||||
hard: 65536
|
||||
volumes:
|
||||
- opensearch-data2:/usr/share/opensearch/data
|
||||
- ./root-ca.pem:/usr/share/opensearch/config/root-ca.pem
|
||||
- ./node.pem:/usr/share/opensearch/config/node.pem
|
||||
- ./node-key.pem:/usr/share/opensearch/config/node-key.pem
|
||||
- ./admin.pem:/usr/share/opensearch/config/admin.pem
|
||||
- ./admin-key.pem:/usr/share/opensearch/config/admin-key.pem
|
||||
- ./custom-opensearch.yml:/usr/share/opensearch/config/opensearch.yml
|
||||
- ./internal_users.yml:/usr/share/opensearch/plugins/opensearch-security/securityconfig/internal_users.yml
|
||||
- ./roles_mapping.yml:/usr/share/opensearch/plugins/opensearch-security/securityconfig/roles_mapping.yml
|
||||
- ./tenants.yml:/usr/share/opensearch/plugins/opensearch-security/securityconfig/tenants.yml
|
||||
- ./roles.yml:/usr/share/opensearch/plugins/opensearch-security/securityconfig/roles.yml
|
||||
- ./action_groups.yml:/usr/share/opensearch/plugins/opensearch-security/securityconfig/action_groups.yml
|
||||
networks:
|
||||
- opensearch-net
|
||||
opensearch-dashboards
|
||||
image: opensearchproject/opensearch-dashboards:{{site.opensearch_version}}
|
||||
container_name: opensearch-dashboards
|
||||
ports:
|
||||
- 5601:5601
|
||||
expose:
|
||||
- "5601"
|
||||
environment:
|
||||
OPENSEARCH_URL: https://opensearch-node1:9200
|
||||
OPENSEARCH_HOSTS: https://opensearch-node1:9200
|
||||
volumes:
|
||||
- ./custom-opensearch_dashboards.yml:/usr/share/opensearch-dashboards/config/opensearch_dashboards.yml
|
||||
networks:
|
||||
- opensearch-net
|
||||
|
||||
volumes:
|
||||
opensearch-data1:
|
||||
opensearch-data2:
|
||||
|
||||
networks:
|
||||
opensearch-net:
|
||||
```
|
||||
|
||||
Then make your changes to `opensearch.yml`. For a full list of settings, see [Security](../../../security/configuration/). This example adds (extremely) verbose audit logging:
|
||||
|
||||
```yml
|
||||
opensearch_security.ssl.transport.pemcert_filepath: node.pem
|
||||
opensearch_security.ssl.transport.pemkey_filepath: node-key.pem
|
||||
opensearch_security.ssl.transport.pemtrustedcas_filepath: root-ca.pem
|
||||
opensearch_security.ssl.transport.enforce_hostname_verification: false
|
||||
opensearch_security.ssl.http.enabled: true
|
||||
opensearch_security.ssl.http.pemcert_filepath: node.pem
|
||||
opensearch_security.ssl.http.pemkey_filepath: node-key.pem
|
||||
opensearch_security.ssl.http.pemtrustedcas_filepath: root-ca.pem
|
||||
opensearch_security.allow_default_init_securityindex: true
|
||||
opensearch_security.authcz.admin_dn:
|
||||
- CN=A,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA
|
||||
opensearch_security.nodes_dn:
|
||||
- 'CN=N,OU=UNIT,O=ORG,L=TORONTO,ST=ONTARIO,C=CA'
|
||||
opensearch_security.audit.type: internal_opensearch
|
||||
opensearch_security.enable_snapshot_restore_privilege: true
|
||||
opensearch_security.check_snapshot_restore_write_privileges: true
|
||||
opensearch_security.restapi.roles_enabled: ["all_access", "security_rest_api_access"]
|
||||
cluster.routing.allocation.disk.threshold_enabled: false
|
||||
opensearch_security.audit.config.disabled_rest_categories: NONE
|
||||
opensearch_security.audit.config.disabled_transport_categories: NONE
|
||||
```
|
||||
|
||||
Use this same override process to specify new [authentication settings](../../../security/configuration/configuration/) in `/usr/share/opensearch/plugins/opensearch-security/securityconfig/config.yml`, as well as new default [internal users, roles, mappings, action groups, and tenants](../../../security/configuration/yaml/).
|
||||
|
||||
To start the cluster, run `docker-compose up`.
|
||||
|
||||
If you encounter any `File /usr/share/opensearch/config/opensearch.yml has insecure file permissions (should be 0600)` messages, you can use `chmod` to set file permissions before running `docker-compose up`. Docker Compose passes files to the container as-is.
|
||||
{: .note }
|
||||
|
||||
Finally, you can reach OpenSearch Dashboards at http://localhost:5601, sign in, and use the **Security** panel to perform other management tasks.
|
||||
|
||||
## Using certificates with Docker
|
||||
|
||||
To use your own certificates in your configuration, add all of the necessary certificates to the volumes section of the Docker Compose file:
|
||||
|
||||
```yml
|
||||
volumes:
|
||||
- ./root-ca.pem:/full/path/to/certificate.pem
|
||||
- ./admin.pem:/full/path/to/certificate.pem
|
||||
- ./admin-key.pem:/full/path/to/certificate.pem
|
||||
#Add other certificates
|
||||
```
|
||||
|
||||
After replacing the demo certificates with your own, you must also include a custom `opensearch.yml` in your setup, which you need to specify in the volumes section.
|
||||
|
||||
```yml
|
||||
volumes:
|
||||
#Add certificates here
|
||||
- ./custom-opensearch.yml: /full/path/to/custom-opensearch.yml
|
||||
```
|
||||
|
||||
Remember that the certificates you specify in your Docker Compose file must be the same as the certificates listed in your custom `opensearch.yml` file. At a minimum, you should replace the root, admin, and node certificates with your own. For more information about adding and using certificates, see [Configure TLS certificates](../security/configuration/tls.md).
|
||||
|
||||
```yml
|
||||
opensearch_security.ssl.transport.pemcert_filepath: new-node-cert.pem
|
||||
opensearch_security.ssl.transport.pemkey_filepath: new-node-cert-key.pem
|
||||
opensearch_security.ssl.transport.pemtrustedcas_filepath: new-root-ca.pem
|
||||
opensearch_security.ssl.http.pemcert_filepath: new-node-cert.pem
|
||||
opensearch_security.ssl.http.pemkey_filepath: new-node-cert-key.pem
|
||||
opensearch_security.ssl.http.pemtrustedcas_filepath: new-root-ca.pem
|
||||
opensearch_security.authcz.admin_dn:
|
||||
- CN=admin,OU=SSL,O=Test,L=Test,C=DE
|
||||
```
|
||||
|
||||
To start the cluster, run `docker-compose up` as usual.
|
|
@ -0,0 +1,324 @@
|
|||
---
|
||||
layout: default
|
||||
title: Docker
|
||||
parent: Install OpenSearch
|
||||
nav_order: 1
|
||||
---
|
||||
|
||||
# Docker image
|
||||
|
||||
You can pull the OpenSearch Docker image just like any other image:
|
||||
|
||||
```bash
|
||||
docker pull opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
docker pull opensearchproject/opensearch-dashboards:{{site.opensearch_version}}
|
||||
```
|
||||
|
||||
To check available versions, see [Docker Hub](https://hub.docker.com/u/opensearchproject).
|
||||
|
||||
OpenSearch images use `centos:7` as the base image. If you run Docker locally, we recommend allowing Docker to use at least 4 GB of RAM in **Preferences** > **Resources**.
|
||||
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Run the image
|
||||
|
||||
To run the image for local development:
|
||||
|
||||
```bash
|
||||
docker run -p 9200:9200 -p 9600:9600 -e "discovery.type=single-node" opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
```
|
||||
|
||||
Then send requests to the server to verify that OpenSearch is up and running:
|
||||
|
||||
```bash
|
||||
curl -XGET https://localhost:9200 -u 'admin:admin' --insecure
|
||||
curl -XGET https://localhost:9200/_cat/nodes?v -u 'admin:admin' --insecure
|
||||
curl -XGET https://localhost:9200/_cat/plugins?v -u 'admin:admin' --insecure
|
||||
```
|
||||
|
||||
To find the container ID:
|
||||
|
||||
```bash
|
||||
docker ps
|
||||
```
|
||||
|
||||
Then you can stop the container using:
|
||||
|
||||
```bash
|
||||
docker stop <container-id>
|
||||
```
|
||||
|
||||
|
||||
## Start a cluster
|
||||
|
||||
To deploy multiple nodes and simulate a more realistic deployment, create a [docker-compose.yml](https://docs.docker.com/compose/compose-file/) file appropriate for your environment and run:
|
||||
|
||||
```bash
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
To stop the cluster, run:
|
||||
|
||||
```bash
|
||||
docker-compose down
|
||||
```
|
||||
|
||||
To stop the cluster and delete all data volumes, run:
|
||||
|
||||
```bash
|
||||
docker-compose down -v
|
||||
```
|
||||
|
||||
|
||||
#### Sample Docker Compose file
|
||||
|
||||
This sample file starts two data nodes and a container for OpenSearch Dashboards.
|
||||
|
||||
```yml
|
||||
version: '3'
|
||||
services:
|
||||
opensearch-node1:
|
||||
image: opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
container_name: opensearch-node1
|
||||
environment:
|
||||
- cluster.name=opensearch-cluster
|
||||
- node.name=opensearch-node1
|
||||
- discovery.seed_hosts=opensearch-node1,opensearch-node2
|
||||
- cluster.initial_master_nodes=opensearch-node1,opensearch-node2
|
||||
- bootstrap.memory_lock=true # along with the memlock settings below, disables swapping
|
||||
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
nofile:
|
||||
soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems
|
||||
hard: 65536
|
||||
volumes:
|
||||
- opensearch-data1:/usr/share/opensearch/data
|
||||
ports:
|
||||
- 9200:9200
|
||||
- 9600:9600 # required for Performance Analyzer
|
||||
networks:
|
||||
- opensearch-net
|
||||
opensearch-node2:
|
||||
image: opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
container_name: opensearch-node2
|
||||
environment:
|
||||
- cluster.name=opensearch-cluster
|
||||
- node.name=opensearch-node2
|
||||
- discovery.seed_hosts=opensearch-node1,opensearch-node2
|
||||
- cluster.initial_master_nodes=opensearch-node1,opensearch-node2
|
||||
- bootstrap.memory_lock=true
|
||||
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
nofile:
|
||||
soft: 65536
|
||||
hard: 65536
|
||||
volumes:
|
||||
- opensearch-data2:/usr/share/opensearch/data
|
||||
networks:
|
||||
- opensearch-net
|
||||
opensearch-dashboards:
|
||||
image: opensearchproject/opensearch-dashboards:{{site.opensearch_version}}
|
||||
container_name: opensearch-dashboards
|
||||
ports:
|
||||
- 5601:5601
|
||||
expose:
|
||||
- "5601"
|
||||
environment:
|
||||
OPENSEARCH_HOSTS: https://opensearch-node1:9200
|
||||
networks:
|
||||
- opensearch-net
|
||||
|
||||
volumes:
|
||||
opensearch-data1:
|
||||
opensearch-data2:
|
||||
|
||||
networks:
|
||||
opensearch-net:
|
||||
```
|
||||
|
||||
If you override `opensearch_dashboards.yml` settings using environment variables, as seen above, use all uppercase letters and periods in place of underscores (e.g. for `opensearch.url`, specify `OPENSEARCH_URL`).
|
||||
{: .note}
|
||||
|
||||
|
||||
## Configure OpenSearch
|
||||
|
||||
You can pass a custom `opensearch.yml` file to the Docker container using the [`-v` flag](https://docs.docker.com/engine/reference/commandline/run/#mount-volume--v---read-only) for `docker run`:
|
||||
|
||||
```bash
|
||||
docker run \
|
||||
-p 9200:9200 -p 9600:9600 \
|
||||
-e "discovery.type=single-node" \
|
||||
-v /<full-path-to>/custom-opensearch.yml:/usr/share/opensearch/config/opensearch.yml \
|
||||
opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
```
|
||||
|
||||
You can perform the same operation in `docker-compose.yml` using a relative path:
|
||||
|
||||
```yml
|
||||
services:
|
||||
opensearch-node1:
|
||||
volumes:
|
||||
- opensearch-data1:/usr/share/opensearch/data
|
||||
- ./custom-opensearch.yml:/usr/share/opensearch/config/opensearch.yml
|
||||
opensearch-node2:
|
||||
volumes:
|
||||
- opensearch-data2:/usr/share/opensearch/data
|
||||
- ./custom-opensearch.yml:/usr/share/opensearch/config/opensearch.yml
|
||||
opensearch-dashboards
|
||||
volumes:
|
||||
- ./custom-opensearch_dashboards.yml:/usr/share/opensearch-dashboards/config/opensearch_dashboards.yml
|
||||
```
|
||||
|
||||
You can also configure `docker-compose.yml` and `opensearch.yml` [to take your own certificates](../docker-security/) for use with the [Security](../../security/configuration/) plugin.
|
||||
|
||||
|
||||
### (Optional) Set up Performance Analyzer
|
||||
|
||||
1. Enable the Performance Analyzer plugin:
|
||||
|
||||
```bash
|
||||
curl -XPOST localhost:9200/_opensearch/_performanceanalyzer/cluster/config -H 'Content-Type: application/json' -d '{"enabled": true}'
|
||||
```
|
||||
|
||||
If you receive the `curl: (52) Empty reply from server` error, you are likely protecting your cluster with the security plugin and you need to provide credentials. Modify the following command to use your username and password:
|
||||
|
||||
```bash
|
||||
curl -XPOST https://localhost:9200/_opensearch/_performanceanalyzer/cluster/config -H 'Content-Type: application/json' -d '{"enabled": true}' -u 'admin:admin' -k
|
||||
```
|
||||
|
||||
1. Enable the Root Cause Analyzer (RCA) framework
|
||||
|
||||
```bash
|
||||
curl -XPOST localhost:9200/_opensearch/_performanceanalyzer/rca/cluster/config -H 'Content-Type: application/json' -d '{"enabled": true}'
|
||||
```
|
||||
|
||||
Similar to step 1, if you run into `curl: (52) Empty reply from server`, run the command below to enable RCA
|
||||
|
||||
```bash
|
||||
curl -XPOST https://localhost:9200/_opensearch/_performanceanalyzer/rca/cluster/config -H 'Content-Type: application/json' -d '{"enabled": true}' -u 'admin:admin' -k
|
||||
```
|
||||
|
||||
1. By default, Performance Analyzer's endpoints are not accessible from outside the Docker container.
|
||||
|
||||
To edit this behavior, open a shell session in the container and modify the configuration:
|
||||
|
||||
```bash
|
||||
docker ps # Look up the container id
|
||||
docker exec -it <container-id> /bin/bash
|
||||
# Inside container
|
||||
cd plugins/opensearch_performance_analyzer/pa_config/
|
||||
vi performance-analyzer.properties
|
||||
```
|
||||
|
||||
Uncomment the line `#webservice-bind-host` and set it to `0.0.0.0`:
|
||||
|
||||
```
|
||||
# ======================== OpenSearch performance analyzer plugin config =========================
|
||||
|
||||
# NOTE: this is an example for Linux. Please modify the config accordingly if you are using it under other OS.
|
||||
|
||||
# WebService bind host; default to all interfaces
|
||||
webservice-bind-host = 0.0.0.0
|
||||
|
||||
# Metrics data location
|
||||
metrics-location = /dev/shm/performanceanalyzer/
|
||||
|
||||
# Metrics deletion interval (minutes) for metrics data.
|
||||
# Interval should be between 1 to 60.
|
||||
metrics-deletion-interval = 1
|
||||
|
||||
# If set to true, the system cleans up the files behind it. So at any point, we should expect only 2
|
||||
# metrics-db-file-prefix-path files. If set to false, no files are cleaned up. This can be useful, if you are archiving
|
||||
# the files and wouldn't like for them to be cleaned up.
|
||||
cleanup-metrics-db-files = true
|
||||
|
||||
# WebService exposed by App's port
|
||||
webservice-listener-port = 9600
|
||||
|
||||
# Metric DB File Prefix Path location
|
||||
metrics-db-file-prefix-path = /tmp/metricsdb_
|
||||
|
||||
https-enabled = false
|
||||
|
||||
#Setup the correct path for certificates
|
||||
certificate-file-path = specify_path
|
||||
|
||||
private-key-file-path = specify_path
|
||||
|
||||
# Plugin Stats Metadata file name, expected to be in the same location
|
||||
plugin-stats-metadata = plugin-stats-metadata
|
||||
|
||||
# Agent Stats Metadata file name, expected to be in the same location
|
||||
agent-stats-metadata = agent-stats-metadata
|
||||
```
|
||||
|
||||
1. Then restart the Performance Analyzer agent:
|
||||
|
||||
```bash
|
||||
kill $(ps aux | grep -i 'PerformanceAnalyzerApp' | grep -v grep | awk '{print $2}')
|
||||
```
|
||||
|
||||
|
||||
## Bash access to containers
|
||||
|
||||
To create an interactive Bash session in a container, run `docker ps` to find the container ID. Then run:
|
||||
|
||||
```bash
|
||||
docker exec -it <container-id> /bin/bash
|
||||
```
|
||||
|
||||
|
||||
## Customize the Docker image
|
||||
|
||||
To run the image with a custom plugin, first create a [`Dockerfile`](https://docs.docker.com/engine/reference/builder/):
|
||||
|
||||
```
|
||||
FROM opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
RUN /usr/share/opensearch/bin/opensearch-plugin install --batch <plugin-name-or-url>
|
||||
```
|
||||
|
||||
Then run the following commands:
|
||||
|
||||
```bash
|
||||
docker build --tag=opensearch-custom-plugin .
|
||||
docker run -p 9200:9200 -p 9600:9600 -v /usr/share/opensearch/data opensearch-custom-plugin
|
||||
```
|
||||
|
||||
You can also use a `Dockerfile` to pass your own certificates for use with the [Security](../../../security/) plugin, similar to the `-v` argument in [Configure OpenSearch](#configure-opensearch):
|
||||
|
||||
```
|
||||
FROM opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
COPY --chown=opensearch:opensearch opensearch.yml /usr/share/opensearch/config/
|
||||
COPY --chown=opensearch:opensearch my-key-file.pem /usr/share/opensearch/config/
|
||||
COPY --chown=opensearch:opensearch my-certificate-chain.pem /usr/share/opensearch/config/
|
||||
COPY --chown=opensearch:opensearch my-root-cas.pem /usr/share/opensearch/config/
|
||||
```
|
||||
|
||||
Alternately, you might want to remove a plugin. This `Dockerfile` removes the security plugin:
|
||||
|
||||
```
|
||||
FROM opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
RUN /usr/share/opensearch/bin/opensearch-plugin remove opensearch_security
|
||||
COPY --chown=opensearch:opensearch opensearch.yml /usr/share/opensearch/config/
|
||||
```
|
||||
|
||||
In this case, `opensearch.yml` is a "vanilla" version of the file with no OpenSearch entries. It might look like this:
|
||||
|
||||
```yml
|
||||
cluster.name: "docker-cluster"
|
||||
network.host: 0.0.0.0
|
||||
```
|
|
@ -0,0 +1,40 @@
|
|||
---
|
||||
layout: default
|
||||
title: Important settings
|
||||
parent: Install OpenSearch
|
||||
nav_order: 70
|
||||
---
|
||||
|
||||
# Important settings
|
||||
|
||||
For production workloads, make sure the [Linux setting](https://www.kernel.org/doc/Documentation/sysctl/vm.txt) `vm.max_map_count` is set to at least 262144. On the OpenSearch Docker image, this setting is the default. To check, start a Bash session in the container and run:
|
||||
|
||||
```bash
|
||||
cat /proc/sys/vm/max_map_count
|
||||
```
|
||||
|
||||
To increase this value, you have to modify the Docker image. For other install types, add this setting to the host machine's `/etc/sysctl.conf` file with the following line:
|
||||
|
||||
```
|
||||
vm.max_map_count=262144
|
||||
```
|
||||
|
||||
Then run `sudo sysctl -p` to reload.
|
||||
|
||||
The [sample docker-compose.yml](../docker/#sample-docker-compose-file) file also contains several key settings:
|
||||
|
||||
- `bootstrap.memory_lock=true`
|
||||
|
||||
Disbles swapping (along with `memlock`). Swapping can dramatically decrease performance and stability, so you should ensure it is disabled on production clusters.
|
||||
|
||||
- `OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m`
|
||||
|
||||
Sets the size of the Java heap (we recommend half of system RAM).
|
||||
|
||||
- `nofile 65536`
|
||||
|
||||
Sets a limit of 65536 open files for the OpenSearch user.
|
||||
|
||||
- `port 9600`
|
||||
|
||||
Allows you to access Performance Analyzer on port 9600.
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
layout: default
|
||||
title: Install OpenSearch
|
||||
nav_order: 2
|
||||
redirect_from: /docs/install/
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Install and configure OpenSearch
|
||||
|
||||
OpenSearch has two installation options at this time: Docker images and tarballs.
|
|
@ -0,0 +1,263 @@
|
|||
---
|
||||
layout: default
|
||||
title: OpenSearch plugins
|
||||
parent: Install OpenSearch
|
||||
nav_order: 90
|
||||
---
|
||||
|
||||
# Standalone OpenSearch plugin installation
|
||||
|
||||
If you don't want to use the all-in-one OpenSearch installation options, you can install the individual plugins on a compatible OpenSearch cluster, just like any other plugin.
|
||||
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Plugin compatibility
|
||||
|
||||
<table>
|
||||
<thead style="text-align: left">
|
||||
<tr>
|
||||
<th>OpenSearch version</th>
|
||||
<th>Plugin versions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>1.0.0-beta1</td>
|
||||
<td>
|
||||
<pre>opensearch-alerting 1.0.0.0-beta1
|
||||
opensearch-anomaly-detection 1.0.0.0-beta1
|
||||
opensearch-asynchronous-search 1.0.0.0-beta1
|
||||
opensearch-index-management 1.0.0.0-beta1
|
||||
opensearch-job-scheduler 1.0.0.0-beta1
|
||||
opensearch-knn 1.0.0.0-beta1
|
||||
opensearch-performance-analyzer 1.0.0.0-beta1
|
||||
opensearch-reports-scheduler 1.0.0.0-beta1
|
||||
opensearch-security 1.0.0.0-beta1
|
||||
opensearch-sql 1.0.0.0-beta1
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
To install plugins manually, you must have the exact version of OpenSearch installed, down to the minor version.
|
||||
|
||||
{% comment %}
|
||||
|
||||
To get a list of available OpenSearch versions on CentOS 7 and Amazon Linux 2, run the following command:
|
||||
|
||||
```bash
|
||||
sudo yum list opensearch-oss --showduplicates
|
||||
```
|
||||
|
||||
Then you can specify the version that you need:
|
||||
|
||||
```bash
|
||||
sudo yum install opensearch-oss-6.7.1
|
||||
```
|
||||
|
||||
{% endcomment %}
|
||||
|
||||
|
||||
## Install plugins
|
||||
|
||||
Navigate to the OpenSearch home directory (most likely, it is `/usr/share/opensearch`), and run the install command for each plugin.
|
||||
|
||||
|
||||
### Security
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-plugins/opensearch-security/opensearch-security-{{site.opensearch_major_minor_version}}.1.0.zip
|
||||
```
|
||||
|
||||
After installing the security plugin, you can run `sudo sh /usr/share/opensearch/plugins/opensearch-security/tools/install_demo_configuration.sh` to quickly get started with demo certificates. Otherwise, you must configure it manually and run [securityadmin.sh](../../../security/configuration/security-admin/).
|
||||
|
||||
The security plugin has a corresponding [OpenSearch Dashboards plugin](../../../opensearch-dashboards/install/plugins) that you probably want to install as well.
|
||||
|
||||
|
||||
### Job scheduler
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-plugins/opensearch-job-scheduler/opensearch-job-scheduler-{{site.opensearch_major_minor_version}}.0.0.zip
|
||||
```
|
||||
|
||||
|
||||
### Alerting
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-plugins/opensearch-alerting/opensearch-alerting-{{site.opensearch_major_minor_version}}.1.0.zip
|
||||
```
|
||||
|
||||
To install Alerting, you must first install the Job Scheduler plugin. Alerting has a corresponding [OpenSearch Dashboards plugin](../../../opensearch-dashboards/install/plugins/) that you probably want to install as well.
|
||||
|
||||
|
||||
### SQL
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-plugins/opensearch-sql/opensearch-sql-{{site.opensearch_major_minor_version}}.2.0.zip
|
||||
```
|
||||
|
||||
|
||||
### Reports scheduler
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-plugins/opensearch-reports-scheduler/opensearch-reports-scheduler-{{site.opensearch_major_minor_version}}.0.0.zip
|
||||
```
|
||||
|
||||
|
||||
### Index State Management
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-plugins/opensearch-index-management/opensearch-index-management-{{site.opensearch_major_minor_version}}.2.0.zip
|
||||
```
|
||||
|
||||
To install Index State Management, you must first install the Job Scheduler plugin. ISM has a corresponding [OpenSearch Dashboards plugin](../../../opensearch-dashboards/install/plugins/) that you probably want to install as well.
|
||||
|
||||
|
||||
### k-NN
|
||||
|
||||
k-NN is only available as part of the all-in-one installs: Docker, RPM, and Debian.
|
||||
|
||||
|
||||
### Anomaly detection
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-plugins/opensearch-anomaly-detection/opensearch-anomaly-detection-{{site.opensearch_major_minor_version}}.0.0.zip
|
||||
```
|
||||
|
||||
|
||||
### Asynchronous search
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-plugins/opensearch-asynchronous-search/opensearch-asynchronous-search-{{site.opensearch_major_minor_version}}.0.1.zip
|
||||
```
|
||||
|
||||
|
||||
### Performance Analyzer
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin install https://d3g5vo6xdbdb9a.cloudfront.net/downloads/opensearch-plugins/performance-analyzer/opensearch-performance-analyzer-{{site.opensearch_major_minor_version}}.0.0.zip
|
||||
```
|
||||
|
||||
Performance Analyzer requires some manual configuration after installing the plugin:
|
||||
|
||||
1. Create `/usr/lib/systemd/system/opensearch-performance-analyzer.service` based on [this file](https://github.com/opensearch-project/performance-analyzer/blob/master/packaging/opensearch-performance-analyzer.service).
|
||||
|
||||
1. Make the CLI executable:
|
||||
|
||||
```bash
|
||||
sudo chmod +x /usr/share/opensearch/bin/performance-analyzer-agent-cli
|
||||
```
|
||||
|
||||
1. Run the appropriate `postinst` script for your Linux distribution:
|
||||
|
||||
```bash
|
||||
# Debian-based distros
|
||||
sudo sh /usr/share/opensearch/plugins/opensearch-performance-analyzer/install/deb/postinst.sh 1
|
||||
|
||||
# RPM distros
|
||||
sudo sh /usr/share/opensearch/plugins/opensearch-performance-analyzer/install/rpm/postinst.sh 1
|
||||
```
|
||||
|
||||
1. Make Performance Analyzer accessible outside of the host machine
|
||||
|
||||
```bash
|
||||
cd /usr/share/opensearch # navigate to the OpenSearch home directory
|
||||
cd plugins/opensearch_performance_analyzer/pa_config/
|
||||
vi performance-analyzer.properties
|
||||
```
|
||||
|
||||
Uncomment the line `#webservice-bind-host` and set it to `0.0.0.0`:
|
||||
|
||||
```bash
|
||||
# ======================== OpenSearch performance analyzer plugin config =========================
|
||||
|
||||
# NOTE: this is an example for Linux. Please modify the config accordingly if you are using it under other OS.
|
||||
|
||||
# WebService bind host; default to all interfaces
|
||||
webservice-bind-host = 0.0.0.0
|
||||
|
||||
# Metrics data location
|
||||
metrics-location = /dev/shm/performanceanalyzer/
|
||||
|
||||
# Metrics deletion interval (minutes) for metrics data.
|
||||
# Interval should be between 1 to 60.
|
||||
metrics-deletion-interval = 1
|
||||
|
||||
# If set to true, the system cleans up the files behind it. So at any point, we should expect only 2
|
||||
# metrics-db-file-prefix-path files. If set to false, no files are cleaned up. This can be useful, if you are archiving
|
||||
# the files and wouldn't like for them to be cleaned up.
|
||||
cleanup-metrics-db-files = true
|
||||
|
||||
# WebService exposed by App's port
|
||||
webservice-listener-port = 9600
|
||||
|
||||
# Metric DB File Prefix Path location
|
||||
metrics-db-file-prefix-path = /tmp/metricsdb_
|
||||
|
||||
https-enabled = false
|
||||
|
||||
#Setup the correct path for certificates
|
||||
certificate-file-path = specify_path
|
||||
|
||||
private-key-file-path = specify_path
|
||||
|
||||
# Plugin Stats Metadata file name, expected to be in the same location
|
||||
plugin-stats-metadata = plugin-stats-metadata
|
||||
|
||||
# Agent Stats Metadata file name, expected to be in the same location
|
||||
agent-stats-metadata = agent-stats-metadata
|
||||
```
|
||||
|
||||
1. Start the OpenSearch service:
|
||||
|
||||
```bash
|
||||
sudo systemctl start opensearch.service
|
||||
```
|
||||
|
||||
1. Send a test request:
|
||||
|
||||
```bash
|
||||
curl -XGET "localhost:9600/_opensearch/_performanceanalyzer/metrics?metrics=Latency,CPU_Utilization&agg=avg,max&dim=ShardID&nodes=all"
|
||||
```
|
||||
|
||||
|
||||
## List installed plugins
|
||||
|
||||
To check your installed plugins:
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin list
|
||||
```
|
||||
|
||||
|
||||
## Remove plugins
|
||||
|
||||
If you are removing Performance Analyzer, see below. Otherwise, you can remove the plugin with a single command:
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin remove <plugin-name>
|
||||
```
|
||||
|
||||
Then restart OpenSearch on the node:
|
||||
|
||||
```bash
|
||||
sudo systemctl restart opensearch.service
|
||||
```
|
||||
|
||||
## Update plugins
|
||||
|
||||
OpenSearch doesn't update plugins. Instead, you have to remove and reinstall them:
|
||||
|
||||
```bash
|
||||
sudo bin/opensearch-plugin remove <plugin-name>
|
||||
sudo bin/opensearch-plugin install <plugin-name>
|
||||
```
|
|
@ -0,0 +1,147 @@
|
|||
---
|
||||
layout: default
|
||||
title: Tarball
|
||||
parent: Install OpenSearch
|
||||
nav_order: 50
|
||||
---
|
||||
|
||||
# Tarball
|
||||
|
||||
The tarball installation provides a self-contained directory with everything you need to run OpenSearch, including an integrated Java Development Kit (JDK). The tarball is a good option for testing and development.
|
||||
|
||||
The tarball supports most Linux distributions, including CentOS 7, Amazon Linux 2, and Ubuntu 18.04. If you have your own Java installation and set `JAVA_HOME` in the terminal, macOS works, as well.
|
||||
|
||||
1. Download the tarball from the [OpenSearch downloads page](https://opensearch.org/downloads.html){:target='\_blank'}.
|
||||
|
||||
1. Extract the TAR file to a directory and change to that directory:
|
||||
|
||||
```bash
|
||||
# x64
|
||||
tar -zxf opensearch-{{site.opensearch_version}}-linux-x64.tar.gz
|
||||
cd opensearch-{{site.opensearch_version}}{% comment %}# ARM64
|
||||
tar -zxf opensearch-{{site.opensearch_version}}-linux-arm64.tar.gz
|
||||
cd opensearch-{{site.opensearch_version}}{% endcomment %}
|
||||
```
|
||||
|
||||
1. Run OpenSearch:
|
||||
|
||||
```bash
|
||||
./opensearch-tar-install.sh
|
||||
```
|
||||
|
||||
1. Open a second terminal session, and send requests to the server to verify that OpenSearch is up and running:
|
||||
|
||||
```bash
|
||||
curl -XGET https://localhost:9200 -u 'admin:admin' --insecure
|
||||
curl -XGET https://localhost:9200/_cat/plugins?v -u 'admin:admin' --insecure
|
||||
```
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
You can modify `config/opensearch.yml` or specify environment variables as arguments using `-E`:
|
||||
|
||||
```bash
|
||||
./opensearch-tar-install.sh -Ecluster.name=opensearch-cluster -Enode.name=opensearch-node1 -Ehttp.host=0.0.0.0 -Ediscovery.type=single-node
|
||||
```
|
||||
|
||||
For other settings, see [Important settings](../important-settings/).
|
||||
|
||||
|
||||
### (Optional) Set up Performance Analyzer
|
||||
|
||||
In a tarball installation, Performance Analyzer collects data when it is enabled. But in order to read that data using the REST API on port 9600, you must first manually launch the associated reader agent process:
|
||||
|
||||
1. Make Performance Analyzer accessible outside of the host machine
|
||||
|
||||
```bash
|
||||
cd /usr/share/opensearch # navigate to the OpenSearch home directory
|
||||
cd plugins/opensearch_performance_analyzer/pa_config/
|
||||
vi performance-analyzer.properties
|
||||
```
|
||||
|
||||
Uncomment the line `#webservice-bind-host` and set it to `0.0.0.0`:
|
||||
|
||||
```
|
||||
# ======================== OpenSearch performance analyzer plugin config =========================
|
||||
|
||||
# NOTE: this is an example for Linux. Please modify the config accordingly if you are using it under other OS.
|
||||
|
||||
# WebService bind host; default to all interfaces
|
||||
webservice-bind-host = 0.0.0.0
|
||||
|
||||
# Metrics data location
|
||||
metrics-location = /dev/shm/performanceanalyzer/
|
||||
|
||||
# Metrics deletion interval (minutes) for metrics data.
|
||||
# Interval should be between 1 to 60.
|
||||
metrics-deletion-interval = 1
|
||||
|
||||
# If set to true, the system cleans up the files behind it. So at any point, we should expect only 2
|
||||
# metrics-db-file-prefix-path files. If set to false, no files are cleaned up. This can be useful, if you are archiving
|
||||
# the files and wouldn't like for them to be cleaned up.
|
||||
cleanup-metrics-db-files = true
|
||||
|
||||
# WebService exposed by App's port
|
||||
webservice-listener-port = 9600
|
||||
|
||||
# Metric DB File Prefix Path location
|
||||
metrics-db-file-prefix-path = /tmp/metricsdb_
|
||||
|
||||
https-enabled = false
|
||||
|
||||
#Setup the correct path for certificates
|
||||
certificate-file-path = specify_path
|
||||
|
||||
private-key-file-path = specify_path
|
||||
|
||||
# Plugin Stats Metadata file name, expected to be in the same location
|
||||
plugin-stats-metadata = plugin-stats-metadata
|
||||
|
||||
# Agent Stats Metadata file name, expected to be in the same location
|
||||
agent-stats-metadata = agent-stats-metadata
|
||||
```
|
||||
|
||||
1. Make the CLI executable:
|
||||
|
||||
```bash
|
||||
sudo chmod +x ./bin/performance-analyzer-agent-cli
|
||||
```
|
||||
|
||||
1. Launch the agent CLI:
|
||||
|
||||
```bash
|
||||
ES_HOME="$PWD" ./bin/performance-analyzer-agent-cli
|
||||
```
|
||||
|
||||
1. In a separate window, enable the Performance Analyzer plugin:
|
||||
|
||||
```bash
|
||||
curl -XPOST localhost:9200/_opensearch/_performanceanalyzer/cluster/config -H 'Content-Type: application/json' -d '{"enabled": true}'
|
||||
```
|
||||
|
||||
If you receive the `curl: (52) Empty reply from server` error, you are likely protecting your cluster with the security plugin and you need to provide credentials. Modify the following command to use your username and password:
|
||||
|
||||
```bash
|
||||
curl -XPOST https://localhost:9200/_opensearch/_performanceanalyzer/cluster/config -H 'Content-Type: application/json' -d '{"enabled": true}' -u 'admin:admin' -k
|
||||
```
|
||||
|
||||
1. Finally, enable the Root Cause Analyzer (RCA) framework
|
||||
|
||||
```bash
|
||||
curl -XPOST localhost:9200/_opensearch/_performanceanalyzer/rca/cluster/config -H 'Content-Type: application/json' -d '{"enabled": true}'
|
||||
```
|
||||
|
||||
Similar to step 4, if you run into `curl: (52) Empty reply from server`, run the command below to enable RCA
|
||||
|
||||
```bash
|
||||
curl -XPOST https://localhost:9200/_opensearch/_performanceanalyzer/rca/cluster/config -H 'Content-Type: application/json' -d '{"enabled": true}' -u 'admin:admin' -k
|
||||
```
|
||||
|
||||
{% comment %}
|
||||
|
||||
### (Optional) Removing Performance Analyzer
|
||||
|
||||
See [Clean up Performance Analyzer files](../plugins/#optional-clean-up-performance-analyzer-files).
|
||||
|
||||
{% endcomment %}
|
|
@ -0,0 +1,174 @@
|
|||
---
|
||||
layout: default
|
||||
title: Logs
|
||||
nav_order: 60
|
||||
---
|
||||
|
||||
# Logs
|
||||
|
||||
The OpenSearch logs include valuable information for monitoring cluster operations and troubleshooting issues. The location of the logs differs based on the installation type:
|
||||
|
||||
- On Docker, OpenSearch writes most logs to the console and stores the remainder in `opensearch/logs/`. The tarball installation also uses `opensearch/logs/`.
|
||||
- On the RPM and Debian installations, OpenSearch writes logs to `/var/log/opensearch/`.
|
||||
|
||||
Logs are available as `.log` (plain text) and `.json` files.
|
||||
|
||||
|
||||
## Application logs
|
||||
|
||||
For its application logs, OpenSearch uses [Apache Log4j 2](https://logging.apache.org/log4j/2.x/) and its built-in log levels (from least to most severe) of TRACE, DEBUG, INFO, WARN, ERROR, and FATAL. The default OpenSearch log level is INFO.
|
||||
|
||||
Rather than changing the default log level (`logger.level`), you change the log level for individual OpenSearch modules:
|
||||
|
||||
```json
|
||||
PUT /_cluster/settings
|
||||
{
|
||||
"persistent" : {
|
||||
"logger.org.opensearch.index.reindex" : "DEBUG"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The easiest way to identify modules is not from the logs, which abbreviate the path (for example, `o.o.i.r`), but from the [OpenSearch source code](https://github.com/opensearch-project/opensearch/tree/master/server/src/main/java/org/opensearch).
|
||||
{: .tip }
|
||||
|
||||
After this sample change, OpenSearch emits much more detailed logs during reindex operations:
|
||||
|
||||
```
|
||||
[2019-10-18T16:52:51,184][DEBUG][o.o.i.r.TransportReindexAction] [node1] [1626]: starting
|
||||
[2019-10-18T16:52:51,186][DEBUG][o.o.i.r.TransportReindexAction] [node1] executing initial scroll against [some-index]
|
||||
[2019-10-18T16:52:51,291][DEBUG][o.o.i.r.TransportReindexAction] [node1] scroll returned [3] documents with a scroll id of [DXF1Z==]
|
||||
[2019-10-18T16:52:51,292][DEBUG][o.o.i.r.TransportReindexAction] [node1] [1626]: got scroll response with [3] hits
|
||||
[2019-10-18T16:52:51,294][DEBUG][o.o.i.r.WorkerBulkByScrollTaskState] [node1] [1626]: preparing bulk request for [0s]
|
||||
[2019-10-18T16:52:51,297][DEBUG][o.o.i.r.TransportReindexAction] [node1] [1626]: preparing bulk request
|
||||
[2019-10-18T16:52:51,299][DEBUG][o.o.i.r.TransportReindexAction] [node1] [1626]: sending [3] entry, [222b] bulk request
|
||||
[2019-10-18T16:52:51,310][INFO ][o.e.c.m.MetaDataMappingService] [node1] [some-new-index/R-j3adc6QTmEAEb-eAie9g] create_mapping [_doc]
|
||||
[2019-10-18T16:52:51,383][DEBUG][o.o.i.r.TransportReindexAction] [node1] [1626]: got scroll response with [0] hits
|
||||
[2019-10-18T16:52:51,384][DEBUG][o.o.i.r.WorkerBulkByScrollTaskState] [node1] [1626]: preparing bulk request for [0s]
|
||||
[2019-10-18T16:52:51,385][DEBUG][o.o.i.r.TransportReindexAction] [node1] [1626]: preparing bulk request
|
||||
[2019-10-18T16:52:51,386][DEBUG][o.o.i.r.TransportReindexAction] [node1] [1626]: finishing without any catastrophic failures
|
||||
[2019-10-18T16:52:51,395][DEBUG][o.o.i.r.TransportReindexAction] [node1] Freed [1] contexts
|
||||
```
|
||||
|
||||
The DEBUG and TRACE levels are extremely verbose. If you enable either one to troubleshoot a problem, disable it after you finish.
|
||||
|
||||
There are other ways to change log levels:
|
||||
|
||||
1. Add lines to `opensearch.yml`:
|
||||
|
||||
```yml
|
||||
logger.org.opensearch.index.reindex: debug
|
||||
```
|
||||
|
||||
Modifying `opensearch.yml` makes the most sense if you want to reuse your logging configuration across multiple clusters or debug startup issues with a single node.
|
||||
|
||||
2. Modify `log4j2.properties`:
|
||||
|
||||
```
|
||||
# Define a new logger with unique ID of reindex
|
||||
logger.reindex.name = org.opensearch.index.reindex
|
||||
# Set the log level for that ID
|
||||
logger.reindex.level = debug
|
||||
```
|
||||
|
||||
This approach is extremely flexible, but requires familiarity with the [Log4j 2 property file syntax](https://logging.apache.org/log4j/2.x/manual/configuration.html#Properties). In general, the other options offer a simpler configuration experience.
|
||||
|
||||
If you examine the default `log4j2.properties` file in the configuration directory, you can see a few OpenSearch-specific variables:
|
||||
|
||||
```
|
||||
appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] [%node_name]%marker %m%n
|
||||
appender.rolling_old.fileName = ${sys:os.logs.base_path}${sys:file.separator}${sys:os.logs.cluster_name}.log
|
||||
```
|
||||
|
||||
- `${sys:os.logs.base_path}` is the directory for logs (for example, `/var/log/opensearch/`).
|
||||
- `${sys:os.logs.cluster_name}` is the name of the cluster.
|
||||
- `[%node_name]` is the name of the node.
|
||||
|
||||
|
||||
## Slow logs
|
||||
|
||||
OpenSearch has two *slow logs*, logs that help you identify performance issues: the search slow log and the indexing slow log.
|
||||
|
||||
These logs rely on thresholds to define what qualifies as a "slow" search or indexing operation. For example, you might decide that a query is slow if it takes more than 15 seconds to complete. Unlike application logs, which you configure for modules, you configure slow logs for indices. By default, both logs are disabled (all thresholds are set to `-1`):
|
||||
|
||||
```json
|
||||
GET <some-index>/_settings?include_defaults=true
|
||||
|
||||
{
|
||||
"indexing": {
|
||||
"slowlog": {
|
||||
"reformat": "true",
|
||||
"threshold": {
|
||||
"index": {
|
||||
"warn": "-1",
|
||||
"trace": "-1",
|
||||
"debug": "-1",
|
||||
"info": "-1"
|
||||
}
|
||||
},
|
||||
"source": "1000",
|
||||
"level": "TRACE"
|
||||
}
|
||||
},
|
||||
"search": {
|
||||
"slowlog": {
|
||||
"level": "TRACE",
|
||||
"threshold": {
|
||||
"fetch": {
|
||||
"warn": "-1",
|
||||
"trace": "-1",
|
||||
"debug": "-1",
|
||||
"info": "-1"
|
||||
},
|
||||
"query": {
|
||||
"warn": "-1",
|
||||
"trace": "-1",
|
||||
"debug": "-1",
|
||||
"info": "-1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
To enable these logs, increase one or more thresholds:
|
||||
|
||||
```json
|
||||
PUT <some-index>/_settings
|
||||
{
|
||||
"indexing": {
|
||||
"slowlog": {
|
||||
"threshold": {
|
||||
"index": {
|
||||
"warn": "15s",
|
||||
"trace": "750ms",
|
||||
"debug": "3s",
|
||||
"info": "10s"
|
||||
}
|
||||
},
|
||||
"source": "500",
|
||||
"level": "INFO"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
In this example, OpenSearch logs indexing operations that take 15 seconds or longer at the WARN level and operations that take between 10 and 14.*x* seconds at the INFO level. If you set a threshold to 0 seconds, OpenSearch logs all operations, which can be useful for testing whether slow logs are indeed enabled.
|
||||
|
||||
- `reformat` specifies whether to log the document `_source` field as a single line (`true`) or let it span multiple lines (`false`).
|
||||
- `source` is the number of characters of the document `_source` field to log.
|
||||
- `level` is the minimum log level to include.
|
||||
|
||||
A line from `opensearch_index_indexing_slowlog.log` might look like this:
|
||||
|
||||
```
|
||||
node1 | [2019-10-24T19:48:51,012][WARN][i.i.s.index] [node1] [some-index/i86iF5kyTyy-PS8zrdDeAA] took[3.4ms], took_millis[3], type[_doc], id[1], routing[], source[{"title":"Your Name", "Director":"Makoto Shinkai"}]
|
||||
```
|
||||
|
||||
Slow logs can consume considerable disk space if you set thresholds or levels too low. Consider enabling them temporarily for troubleshooting or performance tuning. To disable slow logs, return all thresholds to `-1`.
|
||||
|
||||
|
||||
## Deprecation logs
|
||||
|
||||
Deprecation logs record when clients make deprecated API calls to your cluster. These logs can help you identify and fix issues prior to upgrading to a new major version. By default, OpenSearch logs deprecated API calls at the WARN level, which works well for almost all use cases. If desired, configure `logger.deprecation.level` using `_cluster/settings`, `opensearch.yml`, or `log4j2.properties`.
|
|
@ -0,0 +1,635 @@
|
|||
---
|
||||
layout: default
|
||||
title: Metric aggregations
|
||||
parent: Aggregations
|
||||
nav_order: 1
|
||||
---
|
||||
|
||||
# Metric Aggregations
|
||||
|
||||
Metric aggregations let you perform simple calculations such as finding the minimum, maximum, and average values of a field.
|
||||
|
||||
## Types of metric aggregations
|
||||
|
||||
Metric aggregations are of two types: single-value metric aggregations and multi-value metric aggregations.
|
||||
|
||||
### Single-value metric aggregations
|
||||
|
||||
Single-value metric aggregations return a single metric. For example, `sum`, `min`, `max`, `avg`, `cardinality`, and `value_count`.
|
||||
|
||||
### Multi-value metric aggregations
|
||||
|
||||
Multi-value metric aggregations return more than one metric. For example, `stats`, `extended_stats`, `matrix_stats`, `percentile`, `percentile_ranks`, `geo_bound`, `top_hits`, and `scripted_metric`.
|
||||
|
||||
## sum, min, max, avg
|
||||
|
||||
The `sum`, `min`, `max`, and `avg` metrics are single-value metric aggregations that return the sum, minimum, maximum, and average values of a field, respectively.
|
||||
|
||||
The following example calculates the total sum of the `taxful_total_price` field:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"sum_taxful_total_price": {
|
||||
"sum": {
|
||||
"field": "taxful_total_price"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample Response
|
||||
|
||||
```json
|
||||
...
|
||||
"aggregations" : {
|
||||
"sum_taxful_total_price" : {
|
||||
"value" : 350884.12890625
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
In a similar fashion, you can find the minimum, maximum, and average values of a field.
|
||||
|
||||
## cardinality
|
||||
|
||||
The `cardinality` metric is a single-value metric aggregation that counts the number of unique or distinct values of a field.
|
||||
|
||||
The following example finds the number of unique products in an eCommerce store:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"unique_products": {
|
||||
"cardinality": {
|
||||
"field": "products.product_id"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
...
|
||||
"aggregations" : {
|
||||
"unique_products" : {
|
||||
"value" : 7033
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The cardinality count is approximate.
|
||||
If you had tens of thousands of products in your store, an accurate cardinality calculation requires loading all the values into a hash set and returning its size. This approach doesn't scale well because it requires more memory and causes high latency.
|
||||
|
||||
You can control the trade-off between memory and accuracy with the `precision_threshold` setting. This setting defines the threshold below which counts are expected to be close to accurate. Above this value, counts might become a bit less accurate. The default value of `precision_threshold` is 3,000. The maximum supported value is 40,000.
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"unique_products": {
|
||||
"cardinality": {
|
||||
"field": "products.product_id",
|
||||
"precision_threshold": 10000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## value_count
|
||||
|
||||
The `value_count` metric is a single-value metric aggregation that calculates the number of values that an aggregation is based on.
|
||||
|
||||
For example, you can use the `value_count` metric with the `avg` metric to find how many numbers the aggregation uses to calculate an average value.
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"number_of_values": {
|
||||
"value_count": {
|
||||
"field": "taxful_total_price"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
...
|
||||
"aggregations" : {
|
||||
"number_of_values" : {
|
||||
"value" : 4675
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## stats, extended_stats, matrix_stats
|
||||
|
||||
The `stats` metric is a multi-value metric aggregation that returns all basic metrics such as `min`, `max`, `sum`, `avg`, and `value_count` in one aggregation query.
|
||||
|
||||
The following example returns the basic stats for the `taxful_total_price` field:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"stats_taxful_total_price": {
|
||||
"stats": {
|
||||
"field": "taxful_total_price"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
...
|
||||
"aggregations" : {
|
||||
"stats_taxful_total_price" : {
|
||||
"count" : 4675,
|
||||
"min" : 6.98828125,
|
||||
"max" : 2250.0,
|
||||
"avg" : 75.05542864304813,
|
||||
"sum" : 350884.12890625
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The `extended_stats` aggregation is an extended version of the `stats` aggregation. Apart from including basic stats, `extended_stats` also returns stats such as `sum_of_squares`, `variance`, and `std_deviation`.
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"extended_stats_taxful_total_price": {
|
||||
"extended_stats": {
|
||||
"field": "taxful_total_price"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample Response
|
||||
|
||||
```json
|
||||
...
|
||||
"aggregations" : {
|
||||
"extended_stats_taxful_total_price" : {
|
||||
"count" : 4675,
|
||||
"min" : 6.98828125,
|
||||
"max" : 2250.0,
|
||||
"avg" : 75.05542864304813,
|
||||
"sum" : 350884.12890625,
|
||||
"sum_of_squares" : 3.9367749294174194E7,
|
||||
"variance" : 2787.59157113862,
|
||||
"variance_population" : 2787.59157113862,
|
||||
"variance_sampling" : 2788.187974983536,
|
||||
"std_deviation" : 52.79764740155209,
|
||||
"std_deviation_population" : 52.79764740155209,
|
||||
"std_deviation_sampling" : 52.80329511482722,
|
||||
"std_deviation_bounds" : {
|
||||
"upper" : 180.6507234461523,
|
||||
"lower" : -30.53986616005605,
|
||||
"upper_population" : 180.6507234461523,
|
||||
"lower_population" : -30.53986616005605,
|
||||
"upper_sampling" : 180.66201887270256,
|
||||
"lower_sampling" : -30.551161586606312
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The `std_deviation_bounds` object provides a visual variance of the data with an interval of plus/minus two standard deviations from the mean.
|
||||
To set the standard deviation to a different value, say 3, set `sigma` to 3:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"extended_stats_taxful_total_price": {
|
||||
"extended_stats": {
|
||||
"field": "taxful_total_price",
|
||||
"sigma": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The `matrix_stats` aggregation generates advanced stats for multiple fields in a matrix form.
|
||||
The following example returns advanced stats in a matrix form for the `taxful_total_price` and `products.base_price` fields:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"matrix_stats_taxful_total_price": {
|
||||
"matrix_stats": {
|
||||
"fields": ["taxful_total_price", "products.base_price"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
...
|
||||
"aggregations" : {
|
||||
"matrix_stats_taxful_total_price" : {
|
||||
"doc_count" : 4675,
|
||||
"fields" : [
|
||||
{
|
||||
"name" : "products.base_price",
|
||||
"count" : 4675,
|
||||
"mean" : 34.994239430147196,
|
||||
"variance" : 360.5035285833703,
|
||||
"skewness" : 5.530161335032702,
|
||||
"kurtosis" : 131.16306324042148,
|
||||
"covariance" : {
|
||||
"products.base_price" : 360.5035285833703,
|
||||
"taxful_total_price" : 846.6489362233166
|
||||
},
|
||||
"correlation" : {
|
||||
"products.base_price" : 1.0,
|
||||
"taxful_total_price" : 0.8444765264325268
|
||||
}
|
||||
},
|
||||
{
|
||||
"name" : "taxful_total_price",
|
||||
"count" : 4675,
|
||||
"mean" : 75.05542864304839,
|
||||
"variance" : 2788.1879749835402,
|
||||
"skewness" : 15.812149139924037,
|
||||
"kurtosis" : 619.1235507385902,
|
||||
"covariance" : {
|
||||
"products.base_price" : 846.6489362233166,
|
||||
"taxful_total_price" : 2788.1879749835402
|
||||
},
|
||||
"correlation" : {
|
||||
"products.base_price" : 0.8444765264325268,
|
||||
"taxful_total_price" : 1.0
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Statistic | Description
|
||||
:--- | :---
|
||||
`count` | The number of samples measured.
|
||||
`mean` | The average value of the field measured from the sample.
|
||||
`variance` | How far the values of the field measured are spread out from its mean value. The larger the variance, the more it's spread from its mean value.
|
||||
`skewness` | An asymmetric measure of the distribution of the field's values around the mean.
|
||||
`kurtosis` | A measure of the tail heaviness of a distribution. As the tail becomes lighter, kurtosis decreases. As the tail becomes heavier, kurtosis increases. To learn about kurtosis, see [Wikipedia](https://en.wikipedia.org/wiki/Kurtosis).
|
||||
`covariance` | A measure of the joint variability between two fields. A positive value means their values move in the same direction and vice versa.
|
||||
`correlation` | A measure of the strength of the relationship between two fields. The valid values are between [-1, 1]. A value of -1 means that the value is negatively correlated and a value of 1 means that it's positively correlated. A value of 0 means that there's no identifiable relationship between them.
|
||||
|
||||
## percentile, percentile_ranks
|
||||
|
||||
Percentile is the percentage of the data that's at or below a certain threshold value.
|
||||
|
||||
The `percentile` metric is a multi-value metric aggregation that lets you find outliers in your data or figure out the distribution of your data.
|
||||
|
||||
Like the `cardinality` metric, the `percentile` metric is also approximate.
|
||||
|
||||
The following example calculates the percentile in relation to the `taxful_total_price` field:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"percentile_taxful_total_price": {
|
||||
"percentiles": {
|
||||
"field": "taxful_total_price"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
...
|
||||
"aggregations" : {
|
||||
"percentile_taxful_total_price" : {
|
||||
"values" : {
|
||||
"1.0" : 21.984375,
|
||||
"5.0" : 27.984375,
|
||||
"25.0" : 44.96875,
|
||||
"50.0" : 64.22061688311689,
|
||||
"75.0" : 93.0,
|
||||
"95.0" : 156.0,
|
||||
"99.0" : 222.0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Percentile rank is the percentile of values at or below a threshold grouped by a specified value. For example, if a value is greater than or equal to 80% of the values, it has a percentile rank of 80.
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"percentile_rank_taxful_total_price": {
|
||||
"percentile_ranks": {
|
||||
"field": "taxful_total_price",
|
||||
"values": [
|
||||
10,
|
||||
15
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
...
|
||||
"aggregations" : {
|
||||
"percentile_rank_taxful_total_price" : {
|
||||
"values" : {
|
||||
"10.0" : 0.055096056411283456,
|
||||
"15.0" : 0.0830092961834656
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## geo_bound
|
||||
|
||||
The `geo_bound` metric is a multi-value metric aggregation that calculates the bounding box in terms of latitude and longitude around a `geo_point` field.
|
||||
|
||||
The following example returns the `geo_bound` metrics for the `geoip.location` field:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"geo": {
|
||||
"geo_bounds": {
|
||||
"field": "geoip.location"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
"aggregations" : {
|
||||
"geo" : {
|
||||
"bounds" : {
|
||||
"top_left" : {
|
||||
"lat" : 52.49999997206032,
|
||||
"lon" : -118.20000001229346
|
||||
},
|
||||
"bottom_right" : {
|
||||
"lat" : 4.599999985657632,
|
||||
"lon" : 55.299999956041574
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## top_hits
|
||||
|
||||
The `top_hits` metric is a multi-value metric aggregation that ranks the matching documents based on a relevance score for the field that's being aggregated.
|
||||
|
||||
You can specify the following options:
|
||||
|
||||
- `from`: The starting position of the hit.
|
||||
- `size`: The maximum size of hits to return. The default value is 3.
|
||||
- `sort`: How the matching hits are sorted. By default, the hits are sorted by the relevance score of the aggregation query.
|
||||
|
||||
The following example returns the top 5 products in your eCommerce data:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"top_hits_products": {
|
||||
"top_hits": {
|
||||
"size": 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
...
|
||||
"aggregations" : {
|
||||
"top_hits_products" : {
|
||||
"hits" : {
|
||||
"total" : {
|
||||
"value" : 4675,
|
||||
"relation" : "eq"
|
||||
},
|
||||
"max_score" : 1.0,
|
||||
"hits" : [
|
||||
{
|
||||
"_index" : "opensearch_dashboards_sample_data_ecommerce",
|
||||
"_type" : "_doc",
|
||||
"_id" : "glMlwXcBQVLeQPrkHPtI",
|
||||
"_score" : 1.0,
|
||||
"_source" : {
|
||||
"category" : [
|
||||
"Women's Accessories",
|
||||
"Women's Clothing"
|
||||
],
|
||||
"currency" : "EUR",
|
||||
"customer_first_name" : "rania",
|
||||
"customer_full_name" : "rania Evans",
|
||||
"customer_gender" : "FEMALE",
|
||||
"customer_id" : 24,
|
||||
"customer_last_name" : "Evans",
|
||||
"customer_phone" : "",
|
||||
"day_of_week" : "Sunday",
|
||||
"day_of_week_i" : 6,
|
||||
"email" : "rania@evans-family.zzz",
|
||||
"manufacturer" : [
|
||||
"Tigress Enterprises"
|
||||
],
|
||||
"order_date" : "2021-02-28T14:16:48+00:00",
|
||||
"order_id" : 583581,
|
||||
"products" : [
|
||||
{
|
||||
"base_price" : 10.99,
|
||||
"discount_percentage" : 0,
|
||||
"quantity" : 1,
|
||||
"manufacturer" : "Tigress Enterprises",
|
||||
"tax_amount" : 0,
|
||||
"product_id" : 19024,
|
||||
"category" : "Women's Accessories",
|
||||
"sku" : "ZO0082400824",
|
||||
"taxless_price" : 10.99,
|
||||
"unit_discount_amount" : 0,
|
||||
"min_price" : 5.17,
|
||||
"_id" : "sold_product_583581_19024",
|
||||
"discount_amount" : 0,
|
||||
"created_on" : "2016-12-25T14:16:48+00:00",
|
||||
"product_name" : "Snood - white/grey/peach",
|
||||
"price" : 10.99,
|
||||
"taxful_price" : 10.99,
|
||||
"base_unit_price" : 10.99
|
||||
},
|
||||
{
|
||||
"base_price" : 32.99,
|
||||
"discount_percentage" : 0,
|
||||
"quantity" : 1,
|
||||
"manufacturer" : "Tigress Enterprises",
|
||||
"tax_amount" : 0,
|
||||
"product_id" : 19260,
|
||||
"category" : "Women's Clothing",
|
||||
"sku" : "ZO0071900719",
|
||||
"taxless_price" : 32.99,
|
||||
"unit_discount_amount" : 0,
|
||||
"min_price" : 17.15,
|
||||
"_id" : "sold_product_583581_19260",
|
||||
"discount_amount" : 0,
|
||||
"created_on" : "2016-12-25T14:16:48+00:00",
|
||||
"product_name" : "Cardigan - grey",
|
||||
"price" : 32.99,
|
||||
"taxful_price" : 32.99,
|
||||
"base_unit_price" : 32.99
|
||||
}
|
||||
],
|
||||
"sku" : [
|
||||
"ZO0082400824",
|
||||
"ZO0071900719"
|
||||
],
|
||||
"taxful_total_price" : 43.98,
|
||||
"taxless_total_price" : 43.98,
|
||||
"total_quantity" : 2,
|
||||
"total_unique_products" : 2,
|
||||
"type" : "order",
|
||||
"user" : "rani",
|
||||
"geoip" : {
|
||||
"country_iso_code" : "EG",
|
||||
"location" : {
|
||||
"lon" : 31.3,
|
||||
"lat" : 30.1
|
||||
},
|
||||
"region_name" : "Cairo Governorate",
|
||||
"continent_name" : "Africa",
|
||||
"city_name" : "Cairo"
|
||||
},
|
||||
"event" : {
|
||||
"dataset" : "sample_ecommerce"
|
||||
}
|
||||
}
|
||||
...
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## scripted_metric
|
||||
|
||||
The `scripted_metric` metric is a multi-value metric aggregation that returns metrics calculated from a specified script.
|
||||
|
||||
A script has four stages: the initial stage, the map stage, the combine stage, and the reduce stage.
|
||||
|
||||
* `init_script`: (OPTIONAL) Sets the initial state and executes before any collection of documents.
|
||||
* `map_script`: Checks the value of the `type` field and executes the aggregation on the collected documents.
|
||||
* `combine_script`: Aggregates the state returned from every shard. The aggregated value is returned to the coordinating node.
|
||||
* `reduce_script`: Provides access to the variable states; this variable combines the results from the `combine_script` on each shard into an array.
|
||||
|
||||
The following example aggregates the different HTTP response types in web log data:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_logs/_search
|
||||
{
|
||||
"size": 0,
|
||||
"aggregations": {
|
||||
"responses.counts": {
|
||||
"scripted_metric": {
|
||||
"init_script": "state.responses = ['error':0L,'success':0L,'other':0L]",
|
||||
"map_script": """
|
||||
def code = doc['response.keyword'].value;
|
||||
if (code.startsWith('5') || code.startsWith('4')) {
|
||||
state.responses.error += 1 ;
|
||||
} else if(code.startsWith('2')) {
|
||||
state.responses.success += 1;
|
||||
} else {
|
||||
state.responses.other += 1;
|
||||
}
|
||||
""",
|
||||
"combine_script": "state.responses",
|
||||
"reduce_script": """
|
||||
def counts = ['error': 0L, 'success': 0L, 'other': 0L];
|
||||
for (responses in states) {
|
||||
counts.error += responses['error'];
|
||||
counts.success += responses['success'];
|
||||
counts.other += responses['other'];
|
||||
}
|
||||
return counts;
|
||||
"""
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample Response
|
||||
|
||||
```json
|
||||
...
|
||||
"aggregations" : {
|
||||
"responses.counts" : {
|
||||
"value" : {
|
||||
"other" : 0,
|
||||
"success" : 12832,
|
||||
"error" : 1242
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,189 @@
|
|||
---
|
||||
layout: default
|
||||
title: Popular APIs
|
||||
nav_order: 96
|
||||
---
|
||||
|
||||
# Popular APIs
|
||||
|
||||
This page contains sample requests for popular OpenSearch APIs.
|
||||
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Create index with non-default settings
|
||||
|
||||
```json
|
||||
PUT my-logs
|
||||
{
|
||||
"settings": {
|
||||
"number_of_shards": 4,
|
||||
"number_of_replicas": 2
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "text"
|
||||
},
|
||||
"year": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Index a document with a random ID
|
||||
|
||||
```json
|
||||
POST my-logs/_doc
|
||||
{
|
||||
"title": "Your Name",
|
||||
"year": "2016"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Index a document with a specific ID
|
||||
|
||||
```json
|
||||
PUT my-logs/_doc/1
|
||||
{
|
||||
"title": "Weathering with You",
|
||||
"year": "2019"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Index several documents at once
|
||||
|
||||
The blank line at the end of the request body is required. If you omit the `_id` field, OpenSearch generates a random ID.
|
||||
|
||||
```json
|
||||
POST _bulk
|
||||
{ "index": { "_index": "my-logs", "_id": "2" } }
|
||||
{ "title": "The Garden of Words", "year": 2013 }
|
||||
{ "index" : { "_index": "my-logs", "_id" : "3" } }
|
||||
{ "title": "5 Centimeters Per Second", "year": 2007 }
|
||||
|
||||
```
|
||||
|
||||
|
||||
## List all indices
|
||||
|
||||
```
|
||||
GET _cat/indices?v
|
||||
```
|
||||
|
||||
|
||||
## Open or close all indices that match a pattern
|
||||
|
||||
```
|
||||
POST my-logs*/_open
|
||||
POST my-logs*/_close
|
||||
```
|
||||
|
||||
|
||||
## Delete all indices that match a pattern
|
||||
|
||||
```
|
||||
DELETE my-logs*
|
||||
```
|
||||
|
||||
|
||||
## Create an index alias
|
||||
|
||||
This request creates the alias `my-logs-today` for the index `my-logs-2019-11-13`.
|
||||
|
||||
```
|
||||
PUT my-logs-2019-11-13/_alias/my-logs-today
|
||||
```
|
||||
|
||||
|
||||
## List all aliases
|
||||
|
||||
```
|
||||
GET _cat/aliases?v
|
||||
```
|
||||
|
||||
|
||||
## Search an index or all indices that match a pattern
|
||||
|
||||
```
|
||||
GET my-logs/_search?q=test
|
||||
GET my-logs*/_search?q=test
|
||||
```
|
||||
|
||||
|
||||
## Get cluster settings, including defaults
|
||||
|
||||
```
|
||||
GET _cluster/settings?include_defaults=true
|
||||
```
|
||||
|
||||
|
||||
## Change disk watermarks (or other cluster settings)
|
||||
|
||||
```json
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"transient": {
|
||||
"cluster.routing.allocation.disk.watermark.low": "80%",
|
||||
"cluster.routing.allocation.disk.watermark.high": "85%"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Get cluster health
|
||||
|
||||
```
|
||||
GET _cluster/health
|
||||
```
|
||||
|
||||
|
||||
## List nodes in the cluster
|
||||
|
||||
```
|
||||
GET _cat/nodes?v
|
||||
```
|
||||
|
||||
|
||||
## Get node statistics
|
||||
|
||||
```
|
||||
GET _nodes/stats
|
||||
```
|
||||
|
||||
|
||||
## Get snapshots in a repository
|
||||
|
||||
```
|
||||
GET _snapshot/my-repository/_all
|
||||
```
|
||||
|
||||
|
||||
## Take a snapshot
|
||||
|
||||
```
|
||||
PUT _snapshot/my-repository/my-snapshot
|
||||
```
|
||||
|
||||
|
||||
## Restore a snapshot
|
||||
|
||||
```json
|
||||
POST _snapshot/my-repository/my-snapshot/_restore
|
||||
{
|
||||
"indices": "-.opensearch_security",
|
||||
"include_global_state": false
|
||||
}
|
||||
```
|
|
@ -0,0 +1,290 @@
|
|||
---
|
||||
layout: default
|
||||
title: Boolean queries
|
||||
parent: Query DSL
|
||||
nav_order: 45
|
||||
---
|
||||
|
||||
# Boolean queries
|
||||
|
||||
The `bool` query lets you combine multiple search queries with boolean logic. You can use boolean logic between queries to either narrow or broaden your search results.
|
||||
|
||||
The `bool` query is a go-to query because it allows you to construct an advanced query by chaining together several simple ones.
|
||||
|
||||
Use the following clauses (subqueries) within the `bool` query:
|
||||
|
||||
Clause | Behavior
|
||||
:--- | :---
|
||||
`must` | The results must match the queries in this clause. If you have multiple queries, every single one must match. Acts as an `and` operator.
|
||||
`must_not` | This is the anti-must clause. All matches are excluded from the results. Acts as a `not` operator.
|
||||
`should` | The results should, but don't have to, match the queries. Each matching `should` clause increases the relevancy score. As an option, you can require one or more queries to match the value of the `minimum_number_should_match` parameter (default is 1).
|
||||
`filter` | Filters reduce your dataset before applying the queries. A query within a filter clause is a yes-no option, where if a document matches the query it's included in the results. Otherwise, it's not. Filter queries do not affect the relevancy score that the results are sorted by. The results of a filter query are generally cached so they tend to run faster. Use the filter query to filter the results based on exact matches, ranges, dates, numbers, and so on.
|
||||
|
||||
The structure of a `bool` query is as follows:
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
{}
|
||||
],
|
||||
"must_not": [
|
||||
{}
|
||||
],
|
||||
"should": [
|
||||
{}
|
||||
],
|
||||
"filter": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
For example, assume you have the complete works of Shakespeare indexed in an OpenSearch cluster. You want to construct a single query that meets the following requirements:
|
||||
|
||||
1. The `text_entry` field must contain the word `love` and should contain either `life` or `grace`.
|
||||
2. The `speaker` field must not contain `ROMEO`.
|
||||
3. Filter these results to the play `Romeo and Juliet` without affecting the relevancy score.
|
||||
|
||||
Use the following query:
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
{
|
||||
"match": {
|
||||
"text_entry": "love"
|
||||
}
|
||||
}
|
||||
],
|
||||
"should": [
|
||||
{
|
||||
"match": {
|
||||
"text_entry": "life"
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": {
|
||||
"text_entry": "grace"
|
||||
}
|
||||
}
|
||||
],
|
||||
"minimum_should_match": 1,
|
||||
"must_not": [
|
||||
{
|
||||
"match": {
|
||||
"speaker": "ROMEO"
|
||||
}
|
||||
}
|
||||
],
|
||||
"filter": {
|
||||
"term": {
|
||||
"play_name": "Romeo and Juliet"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample output
|
||||
|
||||
```json
|
||||
{
|
||||
"took": 12,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 4,
|
||||
"successful": 4,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": {
|
||||
"value": 1,
|
||||
"relation": "eq"
|
||||
},
|
||||
"max_score": 11.356054,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "shakespeare",
|
||||
"_type": "_doc",
|
||||
"_id": "88020",
|
||||
"_score": 11.356054,
|
||||
"_source": {
|
||||
"type": "line",
|
||||
"line_id": 88021,
|
||||
"play_name": "Romeo and Juliet",
|
||||
"speech_number": 19,
|
||||
"line_number": "4.5.61",
|
||||
"speaker": "PARIS",
|
||||
"text_entry": "O love! O life! not life, but love in death!"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If you want to identify which of these clauses actually caused the matching results, name each query with the `_name` parameter.
|
||||
To add the `_name` parameter, change the field name in the `match` query to an object:
|
||||
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
{
|
||||
"match": {
|
||||
"text_entry": {
|
||||
"query": "love",
|
||||
"_name": "love-must"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"should": [
|
||||
{
|
||||
"match": {
|
||||
"text_entry": {
|
||||
"query": "life",
|
||||
"_name": "life-should"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": {
|
||||
"text_entry": {
|
||||
"query": "grace",
|
||||
"_name": "grace-should"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"minimum_should_match": 1,
|
||||
"must_not": [
|
||||
{
|
||||
"match": {
|
||||
"speaker": {
|
||||
"query": "ROMEO",
|
||||
"_name": "ROMEO-must-not"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"filter": {
|
||||
"term": {
|
||||
"play_name": "Romeo and Juliet"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
OpenSearch returns a `matched_queries` array that lists the queries that matched these results:
|
||||
|
||||
```json
|
||||
"matched_queries": [
|
||||
"love-must",
|
||||
"life-should"
|
||||
]
|
||||
```
|
||||
|
||||
If you remove the queries not in this list, you will still see the exact same result.
|
||||
By examining which `should` clause matched, you can better understand the relevancy score of the results.
|
||||
|
||||
You can also construct complex boolean expressions by nesting `bool` queries.
|
||||
For example, to find a `text_entry` field that matches (`love` OR `hate`) AND (`life` OR `grace`) in the play `Romeo and Juliet`:
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
{
|
||||
"bool": {
|
||||
"should": [
|
||||
{
|
||||
"match": {
|
||||
"text_entry": "love"
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": {
|
||||
"text": "hate"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"bool": {
|
||||
"should": [
|
||||
{
|
||||
"match": {
|
||||
"text_entry": "life"
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": {
|
||||
"text": "grace"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"filter": {
|
||||
"term": {
|
||||
"play_name": "Romeo and Juliet"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample output
|
||||
|
||||
```json
|
||||
{
|
||||
"took": 10,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 2,
|
||||
"successful": 2,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": 1,
|
||||
"max_score": 11.37006,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "shakespeare",
|
||||
"_type": "doc",
|
||||
"_id": "88020",
|
||||
"_score": 11.37006,
|
||||
"_source": {
|
||||
"type": "line",
|
||||
"line_id": 88021,
|
||||
"play_name": "Romeo and Juliet",
|
||||
"speech_number": 19,
|
||||
"line_number": "4.5.61",
|
||||
"speaker": "PARIS",
|
||||
"text_entry": "O love! O life! not life, but love in death!"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,435 @@
|
|||
---
|
||||
layout: default
|
||||
title: Full-text queries
|
||||
parent: Query DSL
|
||||
nav_order: 40
|
||||
---
|
||||
|
||||
# Full-text queries
|
||||
|
||||
This page lists all full-text query types and common options. Given the sheer number of options and subtle behaviors, the best method of ensuring useful search results is to test different queries against representative indices and verify the output.
|
||||
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Match
|
||||
|
||||
Creates a [boolean query](https://lucene.apache.org/core/8_4_0/core/org/apache/lucene/search/BooleanQuery.html) that returns results if the search term is present in the field.
|
||||
|
||||
The most basic form of the query provides only a field (`title`) and a term (`wind`):
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"title": "wind"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
For an example that uses [curl](https://curl.haxx.se/), try:
|
||||
|
||||
```bash
|
||||
curl --insecure -XGET -u 'admin:admin' https://<host>:<port>/<index>/_search \
|
||||
-H "content-type: application/json" \
|
||||
-d '{
|
||||
"query": {
|
||||
"match": {
|
||||
"title": "wind"
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
The query accepts the following options. For descriptions of each, see [Options](#options).
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"title": {
|
||||
"query": "wind",
|
||||
"fuzziness": "AUTO",
|
||||
"fuzzy_transpositions": true,
|
||||
"operator": "or",
|
||||
"minimum_should_match": 1,
|
||||
"analyzer": "standard",
|
||||
"zero_terms_query": "none",
|
||||
"lenient": false,
|
||||
"cutoff_frequency": 0.01,
|
||||
"prefix_length": 0,
|
||||
"max_expansions": 50,
|
||||
"boost": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Multi match
|
||||
|
||||
Similar to [match](#match), but searches multiple fields.
|
||||
|
||||
The `^` lets you "boost" certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. In the following example, a match for "wind" in the title field influences `_score` four times as much as a match in the plot field. The result is that films like *The Wind Rises* and *Gone with the Wind* are near the top of the search results, and films like *Twister* and *Sharknado*, which presumably have "wind" in their plot summaries, are near the bottom.
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"multi_match": {
|
||||
"query": "wind",
|
||||
"fields": ["title^4", "plot"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The query accepts the following options. For descriptions of each, see [Options](#options).
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"multi_match": {
|
||||
"query": "wind",
|
||||
"fields": ["title^4", "description"],
|
||||
"type": "most_fields",
|
||||
"operator": "and",
|
||||
"minimum_should_match": 3,
|
||||
"tie_breaker": 0.0,
|
||||
"analyzer": "standard",
|
||||
"boost": 1,
|
||||
"fuzziness": "AUTO",
|
||||
"fuzzy_transpositions": true,
|
||||
"lenient": false,
|
||||
"prefix_length": 0,
|
||||
"max_expansions": 50,
|
||||
"auto_generate_synonyms_phrase_query": true,
|
||||
"cutoff_frequency": 0.01,
|
||||
"zero_terms_query": "none"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Match boolean prefix
|
||||
|
||||
Similar to [match](#match), but creates a [prefix query](https://lucene.apache.org/core/8_4_0/core/org/apache/lucene/search/PrefixQuery.html) out of the last term in the query string.
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"match_bool_prefix": {
|
||||
"title": "rises wi"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The query accepts the following options. For descriptions of each, see [Options](#options).
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"match_bool_prefix": {
|
||||
"title": {
|
||||
"query": "rises wi",
|
||||
"fuzziness": "AUTO",
|
||||
"fuzzy_transpositions": true,
|
||||
"max_expansions": 50,
|
||||
"prefix_length": 0,
|
||||
"operator": "or",
|
||||
"minimum_should_match": 2,
|
||||
"analyzer": "standard"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Match phrase
|
||||
|
||||
Creates a [phrase query](https://lucene.apache.org/core/8_4_0/core/org/apache/lucene/search/PhraseQuery.html) that matches a sequence of terms.
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"match_phrase": {
|
||||
"title": "the wind rises"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The query accepts the following options. For descriptions of each, see [Options](#options).
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"match_phrase": {
|
||||
"title": {
|
||||
"query": "wind rises the",
|
||||
"slop": 3,
|
||||
"analyzer": "standard",
|
||||
"zero_terms_query": "none"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Match phrase prefix
|
||||
|
||||
Similar to [match phrase](#match-phrase), but creates a [prefix query](https://lucene.apache.org/core/8_4_0/core/org/apache/lucene/search/PrefixQuery.html) out of the last term in the query string.
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"match_phrase_prefix": {
|
||||
"title": "the wind ri"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The query accepts the following options. For descriptions of each, see [Options](#options).
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"match_phrase_prefix": {
|
||||
"title": {
|
||||
"query": "the wind ri",
|
||||
"analyzer": "standard",
|
||||
"max_expansions": 50,
|
||||
"slop": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Common terms
|
||||
|
||||
The common terms query separates the query string into high- and low-frequency terms based on number of occurrences on the shard. Low-frequency terms are weighed more heavily in the results, and high-frequency terms are considered only for documents that already matched one or more low-frequency terms. In that sense, you can think of this query as having a built-in, ever-changing list of stop words.
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"common": {
|
||||
"title": {
|
||||
"query": "the wind rises"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The query accepts the following options. For descriptions of each, see [Options](#options).
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"common": {
|
||||
"title": {
|
||||
"query": "the wind rises",
|
||||
"cutoff_frequency": 0.002,
|
||||
"low_freq_operator": "or",
|
||||
"boost": 1,
|
||||
"analyzer": "standard",
|
||||
"minimum_should_match": {
|
||||
"low_freq" : 2,
|
||||
"high_freq" : 3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Query string
|
||||
|
||||
The query string query splits text based on operators and analyzes each individually.
|
||||
|
||||
If you search using the HTTP request parameters (i.e. `_search?q=wind`), OpenSearch creates a query string query.
|
||||
{: .note }
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"query_string": {
|
||||
"query": "the wind AND (rises OR rising)"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The query accepts the following options. For descriptions of each, see [Options](#options).
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"query_string": {
|
||||
"query": "the wind AND (rises OR rising)",
|
||||
"default_field": "title",
|
||||
"type": "best_fields",
|
||||
"fuzziness": "AUTO",
|
||||
"fuzzy_transpositions": true,
|
||||
"fuzzy_max_expansions": 50,
|
||||
"fuzzy_prefix_length": 0,
|
||||
"minimum_should_match": 1,
|
||||
"default_operator": "or",
|
||||
"analyzer": "standard",
|
||||
"lenient": false,
|
||||
"boost": 1,
|
||||
"allow_leading_wildcard": true,
|
||||
"enable_position_increments": true,
|
||||
"phrase_slop": 3,
|
||||
"max_determinized_states": 10000,
|
||||
"time_zone": "-08:00",
|
||||
"quote_field_suffix": "",
|
||||
"quote_analyzer": "standard",
|
||||
"analyze_wildcard": false,
|
||||
"auto_generate_synonyms_phrase_query": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Simple query string
|
||||
|
||||
The simple query string query is like the query string query, but it lets advanced users specify many arguments directly in the query string. The query discards any invalid portions of the query string.
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"simple_query_string": {
|
||||
"query": "\"rises wind the\"~4 | *ising~2",
|
||||
"fields": ["title"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Special character | Behavior
|
||||
:--- | :---
|
||||
`+` | Acts as the `and` operator.
|
||||
`|` | Acts as the `or` operator.
|
||||
`*` | Acts as a wildcard.
|
||||
`""` | Wraps several terms into a phrase.
|
||||
`()` | Wraps a clause for precedence.
|
||||
`~n` | When used after a term (e.g. `wnid~3`), sets `fuzziness`. When used after a phrase, sets `slop`. See [Options](#options).
|
||||
`-` | Negates the term.
|
||||
|
||||
The query accepts the following options. For descriptions of each, see [Options](#options).
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"simple_query_string": {
|
||||
"query": "\"rises wind the\"~4 | *ising~2",
|
||||
"fields": ["title"],
|
||||
"flags": "ALL",
|
||||
"fuzzy_transpositions": true,
|
||||
"fuzzy_max_expansions": 50,
|
||||
"fuzzy_prefix_length": 0,
|
||||
"minimum_should_match": 1,
|
||||
"default_operator": "or",
|
||||
"analyzer": "standard",
|
||||
"lenient": false,
|
||||
"quote_field_suffix": "",
|
||||
"analyze_wildcard": false,
|
||||
"auto_generate_synonyms_phrase_query": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Match all
|
||||
|
||||
Matches all documents. Can be useful for testing.
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"match_all": {}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Match none
|
||||
|
||||
Matches no documents. Rarely useful.
|
||||
|
||||
```json
|
||||
GET _search
|
||||
{
|
||||
"query": {
|
||||
"match_none": {}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Options
|
||||
|
||||
Option | Valid values | Description
|
||||
:--- | :--- | :---
|
||||
`allow_leading_wildcard` | Boolean | Whether `*` and `?` are allowed as the first character of a search term. The default is true.
|
||||
`analyze_wildcard` | Boolean | Whether OpenSearch should attempt to analyze wildcard terms. Some analyzers do a poor job at this task, so the default is false.
|
||||
`analyzer` | `standard, simple, whitespace, stop, keyword, pattern, <language>, fingerprint` | The analyzer you want to use for the query. Different analyzers have different character filters, tokenizers, and token filters. The `stop` analyzer, for example, removes stop words (e.g. "an," "but," "this") from the query string.
|
||||
`auto_generate_synonyms_phrase_query` | Boolean | A value of true (default) automatically generates [phrase queries](https://lucene.apache.org/core/8_4_0/core/org/apache/lucene/search/PhraseQuery.html) for multi-term synonyms. For example, if you have the synonym `"ba, batting average"` and search for "ba," OpenSearch searches for `ba OR "batting average"` (if this option is true) or `ba OR (batting AND average)` (if this option is false).
|
||||
`boost` | Floating-point | Boosts the clause by the given multiplier. Useful for weighing clauses in compound queries. The default is 1.0.
|
||||
`cutoff_frequency` | Between `0.0` and `1.0` or a positive integer | This value lets you define high and low frequency terms based on number of occurrences in the index. Numbers between 0 and 1 are treated as a percentage. For example, 0.10 is 10%. This value means that if a word occurs within the search field in more than 10% of the documents on the shard, OpenSearch considers the word "high frequency" and deemphasizes it when calculating search score.<br /><br />Because this setting is *per shard*, testing its impact on search results can be challenging unless a cluster has many documents.
|
||||
`enable_position_increments` | Boolean | When true, result queries are aware of position increments. This setting is useful when the removal of stop words leaves an unwanted "gap" between terms. The default is true.
|
||||
`fields` | String array | The list of fields to search (e.g. `"fields": ["title^4", "description"]`). If unspecified, defaults to the `index.query.default_field` setting, which defaults to `["*"]`.
|
||||
`flags` | String | A `|`-delimited string of [flags](#simple-query-string) to enable (e.g. `AND|OR|NOT`). The default is `ALL`.
|
||||
`fuzziness` | `AUTO`, `0`, or a positive integer | The number of character edits (insert, delete, substitute) that it takes to change one word to another when determining whether a term matched a value. For example, the distance between `wined` and `wind` is 1. The default, `AUTO`, chooses a value based on the length of each term and is a good choice for most use cases.
|
||||
`fuzzy_transpositions` | Boolean | Setting `fuzzy_transpositions` to true (default) adds swaps of adjacent characters to the insert, delete, and substitute operations of the `fuzziness` option. For example, the distance between `wind` and `wnid` is 1 if `fuzzy_transpositions` is true (swap "n" and "i") and 2 if it is false (delete "n", insert "n"). <br /><br />If `fuzzy_transpositions` is false, `rewind` and `wnid` have the same distance (2) from `wind`, despite the more human-centric opinion that `wnid` is an obvious typo. The default is a good choice for most use cases.
|
||||
`lenient` | Boolean | Setting `lenient` to true lets you ignore data type mismatches between the query and the document field. For example, a query string of "8.2" could match a field of type `float`. The default is false.
|
||||
`low_freq_operator` | `and, or` | The operator for low-frequency terms. The default is `or`. See [Common terms](#common-terms) queries and `operator` in this table.
|
||||
`max_determinized_states` | Positive integer | The maximum number of "[states](https://lucene.apache.org/core/8_4_0/core/org/apache/lucene/util/automaton/Operations.html#DEFAULT_MAX_DETERMINIZED_STATES)" (a measure of complexity) that Lucene can create for query strings that contain regular expressions (e.g. `"query": "/wind.+?/"`). Larger numbers allow for queries that use more memory. The default is 10,000.
|
||||
`max_expansions` | Positive integer | Fuzzy queries "expand to" a number of matching terms that are within the distance specified in `fuzziness`. Then OpenSearch tries to match those terms against its indices. `max_expansions` specifies the maximum number of terms that the fuzzy query expands to. The default is 50.
|
||||
`minimum_should_match` | Positive or negative integer, positive or negative percentage, combination | If the query string contains multiple search terms and you used the `or` operator, the number of terms that need to match for the document to be considered a match. For example, if `minimum_should_match` is 2, "wind often rising" does not match "The Wind Rises." If `minimum_should_match` is 1, it matches. This option also has `low_freq` and `high_freq` properties for [Common terms](#common-terms) queries.
|
||||
`operator` | `or, and` | If the query string contains multiple search terms, whether all terms need to match (`and`) or only one term needs to match (`or`) for a document to be considered a match.
|
||||
`phrase_slop` | `0` (default) or a positive integer | See `slop`.
|
||||
`prefix_length` | `0` (default) or a positive integer | The number of leading characters that are not considered in fuzziness.
|
||||
`quote_field_suffix` | String | This option lets you search different fields depending on whether terms are wrapped in quotes. For example, if `quote_field_suffix` is `".exact"` and you search for `"lightly"` (in quotes) in the `title` field, OpenSearch searches the `title.exact` field. This second field might use a different type (e.g. `keyword` rather than `text`) or a different analyzer. The default is null.
|
||||
`rewrite` | `constant_score, scoring_boolean, constant_score_boolean, top_terms_N, top_terms_boost_N, top_terms_blended_freqs_N` | Determines how OpenSearch rewrites and scores multi-term queries. The default is `constant_score`.
|
||||
`slop` | `0` (default) or a positive integer | Controls the degree to which words in a query can be misordered and still be considered a match. From the [Lucene documentation](https://lucene.apache.org/core/8_4_0/core/org/apache/lucene/search/PhraseQuery.html#getSlop--): "The number of other words permitted between words in query phrase. For example, to switch the order of two words requires two moves (the first move places the words atop one another), so to permit re-orderings of phrases, the slop must be at least two. A value of zero requires an exact match."
|
||||
`tie_breaker` | `0.0` (default) to `1.0` | Changes the way OpenSearch scores searches. For example, a `type` of `best_fields` typically uses the highest score from any one field. If you specify a `tie_breaker` value between 0.0 and 1.0, the score changes to highest score + `tie_breaker` * score for all other matching fields. If you specify a value of 1.0, OpenSearch adds together the scores for all matching fields (effectively defeating the purpose of `best_fields`).
|
||||
`time_zone` | UTC offset | The time zone to use (e.g. `-08:00`) if the query string contains a date range (e.g. `"query": "wind rises release_date[2012-01-01 TO 2014-01-01]"`). The default is `UTC`.
|
||||
`type` | `best_fields, most_fields, cross-fields, phrase, phrase_prefix` | Determines how OpenSearch executes the query and scores the results. The default is `best_fields`.
|
||||
`zero_terms_query` | `none, all` | If the analyzer removes all terms from a query string, whether to match no documents (default) or all documents. For example, the `stop` analyzer removes all terms from the string "an but this."
|
|
@ -0,0 +1,120 @@
|
|||
---
|
||||
layout: default
|
||||
title: Query DSL
|
||||
nav_order: 27
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Query DSL
|
||||
|
||||
While you can use HTTP request parameters to perform simple searches, you can also use the OpenSearch query domain-specific language (DSL), which provides a wider range of search options. The query DSL uses the HTTP request body, so you can more easily customize your queries to get the exact results that you want.
|
||||
|
||||
For example, the following request performs a simple search to search for a `speaker` field that has a value of `queen`.
|
||||
|
||||
**Sample request**
|
||||
```json
|
||||
GET _search?q=speaker:queen
|
||||
```
|
||||
|
||||
**Sample response**
|
||||
```
|
||||
{
|
||||
"took": 87,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 68,
|
||||
"successful": 68,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": {
|
||||
"value": 4080,
|
||||
"relation": "eq"
|
||||
},
|
||||
"max_score": 4.4368687,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "new_shakespeare",
|
||||
"_type": "_doc",
|
||||
"_id": "28559",
|
||||
"_score": 4.4368687,
|
||||
"_source": {
|
||||
"type": "line",
|
||||
"line_id": 28560,
|
||||
"play_name": "Cymbeline",
|
||||
"speech_number": 20,
|
||||
"line_number": "1.1.81",
|
||||
"speaker": "QUEEN",
|
||||
"text_entry": "No, be assured you shall not find me, daughter,"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
With query DSL, however, you can include an HTTP request body to look for results more tailored to your needs. The following example shows how to search for `speaker` and `text_entry` fields that have a value of `QUEEN`.
|
||||
|
||||
**Sample request**
|
||||
```json
|
||||
{
|
||||
"query": {
|
||||
"multi_match": {
|
||||
"query": "QUEEN",
|
||||
"fields": ["speaker", "text_entry"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Sample Response**
|
||||
```json
|
||||
{
|
||||
"took": 39,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 68,
|
||||
"successful": 68,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": {
|
||||
"value": 5837,
|
||||
"relation": "eq"
|
||||
},
|
||||
"max_score": 7.8623476,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "new_shakespeare",
|
||||
"_type": "_doc",
|
||||
"_id": "100763",
|
||||
"_score": 7.8623476,
|
||||
"_source": {
|
||||
"type": "line",
|
||||
"line_id": 100764,
|
||||
"play_name": "Troilus and Cressida",
|
||||
"speech_number": 43,
|
||||
"line_number": "3.1.68",
|
||||
"speaker": "PANDARUS",
|
||||
"text_entry": "Sweet queen, sweet queen! thats a sweet queen, i faith."
|
||||
}
|
||||
},
|
||||
{
|
||||
"_index": "shakespeare",
|
||||
"_type": "_doc",
|
||||
"_id": "28559",
|
||||
"_score": 5.8923807,
|
||||
"_source": {
|
||||
"type": "line",
|
||||
"line_id": 28560,
|
||||
"play_name": "Cymbeline",
|
||||
"speech_number": 20,
|
||||
"line_number": "1.1.81",
|
||||
"speaker": "QUEEN",
|
||||
"text_entry": "No, be assured you shall not find me, daughter,"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
The OpenSearch query DSL comes in three varieties: term-level queries, full-text queries, and boolean queries. You can even perform more complicated searches by using different elements from each variety to find whatever data you need.
|
|
@ -0,0 +1,450 @@
|
|||
---
|
||||
layout: default
|
||||
title: Term-level queries
|
||||
parent: Query DSL
|
||||
nav_order: 30
|
||||
---
|
||||
|
||||
# Term-level queries
|
||||
|
||||
OpenSearch supports two types of queries when you search for data: term-level queries and full-text queries.
|
||||
|
||||
The following table describes the differences between them:
|
||||
|
||||
| | Term-level queries | Full-text queries
|
||||
:--- | :--- | :---
|
||||
*Description* | Term-level queries answer which documents match a query. | Full-text queries answer how well the documents match a query.
|
||||
*Analyzer* | The search term isn't analyzed. This means that the term query searches for your search term as it is. | The search term is analyzed by the same analyzer that was used for the specific field of the document at the time it was indexed. This means that your search term goes through the same analysis process that the document's field did.
|
||||
*Relevance* | Term-level queries simply return documents that match without sorting them based on the relevance score. They still calculate the relevance score, but this score is the same for all the documents that are returned. | Full-text queries calculate a relevance score for each match and sort the results by decreasing order of relevance.
|
||||
*Use Case* | Use term-level queries when you want to match exact values such as numbers, dates, tags, and so on, and don't need the matches to be sorted by relevance. | Use full-text queries to match text fields and sort by relevance after taking into account factors like casing and stemming variants.
|
||||
|
||||
OpenSearch uses a probabilistic ranking framework called Okapi BM25 to calculate relevance scores. To learn more about Okapi BM25, see [Wikipedia](https://en.wikipedia.org/wiki/Okapi_BM25).
|
||||
{: .note }
|
||||
|
||||
Assume that you have the complete works of Shakespeare indexed in an OpenSearch cluster. We use a term-level query to search for the phrase "To be, or not to be" in the `text_entry` field:
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"term": {
|
||||
"text_entry": "To be, or not to be"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"took" : 3,
|
||||
"timed_out" : false,
|
||||
"_shards" : {
|
||||
"total" : 1,
|
||||
"successful" : 1,
|
||||
"skipped" : 0,
|
||||
"failed" : 0
|
||||
},
|
||||
"hits" : {
|
||||
"total" : {
|
||||
"value" : 0,
|
||||
"relation" : "eq"
|
||||
},
|
||||
"max_score" : null,
|
||||
"hits" : [ ]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
We don’t get back any matches (`hits`). This is because the term “To be, or not to be” is searched literally in the inverted index, where only the analyzed values of the text fields are stored. Term-level queries aren't suited for searching on analyzed text fields because they often yield unexpected results. When working with text data, use term-level queries only for fields mapped as keyword only.
|
||||
|
||||
Using a full-text query:
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"text_entry": "To be, or not to be"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The search query “To be, or not to be” is analyzed and tokenized into an array of tokens just like the `text_entry` field of the documents. The full-text query performs an intersection of tokens between our search query and the `text_entry` fields for all the documents, and then sorts the results by relevance scores:
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"took" : 19,
|
||||
"timed_out" : false,
|
||||
"_shards" : {
|
||||
"total" : 1,
|
||||
"successful" : 1,
|
||||
"skipped" : 0,
|
||||
"failed" : 0
|
||||
},
|
||||
"hits" : {
|
||||
"total" : {
|
||||
"value" : 10000,
|
||||
"relation" : "gte"
|
||||
},
|
||||
"max_score" : 17.419369,
|
||||
"hits" : [
|
||||
{
|
||||
"_index" : "shakespeare",
|
||||
"_type" : "_doc",
|
||||
"_id" : "34229",
|
||||
"_score" : 17.419369,
|
||||
"_source" : {
|
||||
"type" : "line",
|
||||
"line_id" : 34230,
|
||||
"play_name" : "Hamlet",
|
||||
"speech_number" : 19,
|
||||
"line_number" : "3.1.64",
|
||||
"speaker" : "HAMLET",
|
||||
"text_entry" : "To be, or not to be: that is the question:"
|
||||
}
|
||||
},
|
||||
{
|
||||
"_index" : "shakespeare",
|
||||
"_type" : "_doc",
|
||||
"_id" : "109930",
|
||||
"_score" : 14.883024,
|
||||
"_source" : {
|
||||
"type" : "line",
|
||||
"line_id" : 109931,
|
||||
"play_name" : "A Winters Tale",
|
||||
"speech_number" : 23,
|
||||
"line_number" : "4.4.153",
|
||||
"speaker" : "PERDITA",
|
||||
"text_entry" : "Not like a corse; or if, not to be buried,"
|
||||
}
|
||||
},
|
||||
{
|
||||
"_index" : "shakespeare",
|
||||
"_type" : "_doc",
|
||||
"_id" : "103117",
|
||||
"_score" : 14.782743,
|
||||
"_source" : {
|
||||
"type" : "line",
|
||||
"line_id" : 103118,
|
||||
"play_name" : "Twelfth Night",
|
||||
"speech_number" : 53,
|
||||
"line_number" : "1.3.95",
|
||||
"speaker" : "SIR ANDREW",
|
||||
"text_entry" : "will not be seen; or if she be, its four to one"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
...
|
||||
```
|
||||
|
||||
For a list of all full-text queries, see [Full-text queries](../full-text/).
|
||||
|
||||
If you want to query for an exact term like “HAMLET” in the speaker field and don't need the results to be sorted by relevance scores, a term-level query is more efficient:
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"term": {
|
||||
"speaker": "HAMLET"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"took" : 5,
|
||||
"timed_out" : false,
|
||||
"_shards" : {
|
||||
"total" : 1,
|
||||
"successful" : 1,
|
||||
"skipped" : 0,
|
||||
"failed" : 0
|
||||
},
|
||||
"hits" : {
|
||||
"total" : {
|
||||
"value" : 1582,
|
||||
"relation" : "eq"
|
||||
},
|
||||
"max_score" : 4.2540946,
|
||||
"hits" : [
|
||||
{
|
||||
"_index" : "shakespeare",
|
||||
"_type" : "_doc",
|
||||
"_id" : "32700",
|
||||
"_score" : 4.2540946,
|
||||
"_source" : {
|
||||
"type" : "line",
|
||||
"line_id" : 32701,
|
||||
"play_name" : "Hamlet",
|
||||
"speech_number" : 9,
|
||||
"line_number" : "1.2.66",
|
||||
"speaker" : "HAMLET",
|
||||
"text_entry" : "[Aside] A little more than kin, and less than kind."
|
||||
}
|
||||
},
|
||||
{
|
||||
"_index" : "shakespeare",
|
||||
"_type" : "_doc",
|
||||
"_id" : "32702",
|
||||
"_score" : 4.2540946,
|
||||
"_source" : {
|
||||
"type" : "line",
|
||||
"line_id" : 32703,
|
||||
"play_name" : "Hamlet",
|
||||
"speech_number" : 11,
|
||||
"line_number" : "1.2.68",
|
||||
"speaker" : "HAMLET",
|
||||
"text_entry" : "Not so, my lord; I am too much i' the sun."
|
||||
}
|
||||
},
|
||||
{
|
||||
"_index" : "shakespeare",
|
||||
"_type" : "_doc",
|
||||
"_id" : "32709",
|
||||
"_score" : 4.2540946,
|
||||
"_source" : {
|
||||
"type" : "line",
|
||||
"line_id" : 32710,
|
||||
"play_name" : "Hamlet",
|
||||
"speech_number" : 13,
|
||||
"line_number" : "1.2.75",
|
||||
"speaker" : "HAMLET",
|
||||
"text_entry" : "Ay, madam, it is common."
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
...
|
||||
```
|
||||
|
||||
The term-level queries are exact matches. So, if you search for “Hamlet”, you don’t get back any matches, because “HAMLET” is a keyword field and is stored in OpenSearch literally and not in an analyzed form.
|
||||
The search query “HAMLET” is also searched literally. So, to get a match on this field, we need to enter the exact same characters.
|
||||
|
||||
---
|
||||
|
||||
## Term
|
||||
|
||||
Use the `term` query to search for an exact term in a field.
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"term": {
|
||||
"line_id": {
|
||||
"value": "61809"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Terms
|
||||
|
||||
Use the `terms` query to search for multiple terms in the same field.
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"terms": {
|
||||
"line_id": [
|
||||
"61809",
|
||||
"61810"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You get back documents that match any of the terms.
|
||||
|
||||
## IDs
|
||||
|
||||
Use the `ids` query to search for one or more document ID values.
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"ids": {
|
||||
"values": [
|
||||
34229,
|
||||
91296
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Range
|
||||
|
||||
Use the `range` query to search for a range of values in a field.
|
||||
|
||||
To search for documents where the `line_id` value is >= 10 and <= 20:
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"range": {
|
||||
"line_id": {
|
||||
"gte": 10,
|
||||
"lte": 20
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Parameter | Behavior
|
||||
:--- | :---
|
||||
`gte` | Greater than or equal to.
|
||||
`gt` | Greater than.
|
||||
`lte` | Less than or equal to.
|
||||
`lt` | Less than.
|
||||
|
||||
Assume that you have a `products` index and you want to find all the products that were added in the year 2019:
|
||||
|
||||
```json
|
||||
GET products/_search
|
||||
{
|
||||
"query": {
|
||||
"range": {
|
||||
"created": {
|
||||
"gte": "2019/01/01",
|
||||
"lte": "2019/12/31"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Specify relative dates by using basic math expressions.
|
||||
|
||||
To subtract 1 year and 1 day from the specified date:
|
||||
|
||||
```json
|
||||
GET products/_search
|
||||
{
|
||||
"query": {
|
||||
"range": {
|
||||
"created": {
|
||||
"gte": "2019/01/01||-1y-1d"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The first date that we specify is the anchor date or the starting point for the date math. Add two trailing pipe symbols. You could then add one day (`+1d`) or subtract two weeks (`-2w`). This math expression is relative to the anchor date that you specify.
|
||||
|
||||
You could also round off dates by adding a forward slash to the date or time unit.
|
||||
|
||||
To find products added in the last year and rounded off by month:
|
||||
|
||||
```json
|
||||
GET products/_search
|
||||
{
|
||||
"query": {
|
||||
"range": {
|
||||
"created": {
|
||||
"gte": "now-1y/M"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The keyword `now` refers to the current date and time.
|
||||
|
||||
## Prefix
|
||||
|
||||
Use the `prefix` query to search for terms that begin with a specific prefix.
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"prefix": {
|
||||
"speaker": "KING"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Exists
|
||||
|
||||
Use the `exists` query to search for documents that contain a specific field.
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"exists": {
|
||||
"field": "speaker"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Wildcards
|
||||
|
||||
Use wildcard queries to search for terms that match a wildcard pattern.
|
||||
|
||||
Feature | Behavior
|
||||
:--- | :---
|
||||
`*` | Specifies all valid values.
|
||||
`?` | Specifies a single valid value.
|
||||
|
||||
To search for terms that start with `H` and end with `Y`:
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"wildcard": {
|
||||
"speaker": {
|
||||
"value": "H*Y"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If we change `*` to `?`, we get no matches, because `?` refers to a single character.
|
||||
|
||||
Wildcard queries tend to be slow because they need to iterate over a lot of terms. Avoid placing wildcard characters at the beginning of a query because it could be a very expensive operation in terms of both resources and time.
|
||||
|
||||
## Regex
|
||||
|
||||
Use the `regex` query to search for terms that match a regular expression.
|
||||
|
||||
This regular expression matches any single uppercase or lowercase letter:
|
||||
|
||||
```json
|
||||
GET shakespeare/_search
|
||||
{
|
||||
"query": {
|
||||
"regexp": {
|
||||
"play_name": "H[a-zA-Z]+mlet"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Regular expressions are applied to the terms in the field and not the entire value of the field.
|
||||
|
||||
The efficiency of your regular expression depends a lot on the patterns you write. Make sure that you write `regex` queries with either a prefix or suffix to improve performance.
|
|
@ -0,0 +1,284 @@
|
|||
---
|
||||
layout: default
|
||||
title: Reindex data
|
||||
nav_order: 16
|
||||
---
|
||||
|
||||
# Reindex data
|
||||
|
||||
After creating an index, you might need to make an extensive change such as adding a new field to every document or combining multiple indices to form a new one. Rather than deleting your index, making the change offline, and then indexing your data all over again, you can use the `reindex` operation.
|
||||
|
||||
With the `reindex` operation, you can copy all or a subset of documents that you select through a query to another index. Reindex is a `POST` operation. In its most basic form, you specify a source index and a destination index.
|
||||
|
||||
Reindexing can be an expensive operation depending on the size of your source index. We recommend you disable replicas in your destination index by setting `number_of_replicas` to `0` and re-enable them once the reindex process is complete.
|
||||
{: .note }
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Reindex all documents
|
||||
|
||||
You can copy all documents from one index to another.
|
||||
|
||||
You first need to create a destination index with your desired field mappings and settings or you can copy the ones from your source index:
|
||||
|
||||
```json
|
||||
PUT destination
|
||||
{
|
||||
"mappings":{
|
||||
"Add in your desired mappings"
|
||||
},
|
||||
"settings":{
|
||||
"Add in your desired settings"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This `reindex` command copies all the documents from a source index to a destination index:
|
||||
|
||||
```json
|
||||
POST _reindex
|
||||
{
|
||||
"source":{
|
||||
"index":"source"
|
||||
},
|
||||
"dest":{
|
||||
"index":"destination"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If the destination index is not already created, the `reindex` operation creates a new destination index with default configurations.
|
||||
|
||||
## Reindex from a remote cluster
|
||||
|
||||
You can copy documents from an index in a remote cluster. Use the `remote` option to specify the remote hostname and the required login credentials.
|
||||
|
||||
This command reaches out to a remote cluster, logs in with the username and password, and copies all the documents from the source index in that remote cluster to the destination index in your local cluster:
|
||||
|
||||
```json
|
||||
POST _reindex
|
||||
{
|
||||
"source":{
|
||||
"remote":{
|
||||
"host":"https://<REST_endpoint_of_remote_cluster>:9200",
|
||||
"username":"YOUR_USERNAME",
|
||||
"password":"YOUR_PASSWORD"
|
||||
}
|
||||
},
|
||||
"dest":{
|
||||
"index":"destination"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can specify the following options:
|
||||
|
||||
Options | Valid values | Description | Required
|
||||
:--- | :--- | :---
|
||||
`host` | String | The REST endpoint of the remote cluster. | Yes
|
||||
`username` | String | The username to log into the remote cluster. | No
|
||||
`password` | String | The password to log into the remote cluster. | No
|
||||
`socket_timeout` | Time Unit | The wait time for socket reads (default 30s). | No
|
||||
`connect_timeout` | Time Unit | The wait time for remote connection timeouts (default 30s). | No
|
||||
|
||||
|
||||
## Reindex a subset of documents
|
||||
|
||||
You can copy a specific set of documents that match a search query.
|
||||
|
||||
This command copies only a subset of documents matched by a query operation to the destination index:
|
||||
|
||||
```json
|
||||
POST _reindex
|
||||
{
|
||||
"source":{
|
||||
"index":"source",
|
||||
"query": {
|
||||
"match": {
|
||||
"field_name": "text"
|
||||
}
|
||||
}
|
||||
},
|
||||
"dest":{
|
||||
"index":"destination"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
For a list of all query operations, see [Full-text queries](../full-text/).
|
||||
|
||||
## Combine one or more indices
|
||||
|
||||
You can combine documents from one or more indices by adding the source indices as a list.
|
||||
|
||||
This command copies all documents from two source indices to one destination index:
|
||||
|
||||
```json
|
||||
POST _reindex
|
||||
{
|
||||
"source":{
|
||||
"index":[
|
||||
"source_1",
|
||||
"source_2"
|
||||
]
|
||||
},
|
||||
"dest":{
|
||||
"index":"destination"
|
||||
}
|
||||
}
|
||||
```
|
||||
Make sure the number of shards for your source and destination indices are the same.
|
||||
|
||||
## Reindex only unique documents
|
||||
|
||||
You can copy only documents missing from a destination index by setting the `op_type` option to `create`.
|
||||
In this case, if a document with the same ID already exists, the operation ignores the one from the source index.
|
||||
To ignore all version conflicts of documents, set the `conflicts` option to `proceed`.
|
||||
|
||||
```json
|
||||
POST _reindex
|
||||
{
|
||||
"conflicts":"proceed",
|
||||
"source":{
|
||||
"index":"source"
|
||||
},
|
||||
"dest":{
|
||||
"index":"destination",
|
||||
"op_type":"create"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Reindex sorted documents
|
||||
|
||||
You can copy certain documents after sorting specific fields in the document.
|
||||
|
||||
This command copies the last 10 documents based on the `timestamp` field:
|
||||
|
||||
```json
|
||||
POST _reindex
|
||||
{
|
||||
"size":10,
|
||||
"source":{
|
||||
"index":"source",
|
||||
"sort":{
|
||||
"timestamp":"desc"
|
||||
}
|
||||
},
|
||||
"dest":{
|
||||
"index":"destination"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Transform documents during reindexing
|
||||
|
||||
You can transform your data during the reindexing process using the `script` option.
|
||||
We recommend Painless for scripting in OpenSearch.
|
||||
|
||||
This command runs the source index through a Painless script that increments a `number` field inside an `account` object before copying it to the destination index:
|
||||
|
||||
```json
|
||||
POST _reindex
|
||||
{
|
||||
"source":{
|
||||
"index":"source"
|
||||
},
|
||||
"dest":{
|
||||
"index":"destination"
|
||||
},
|
||||
"script":{
|
||||
"lang":"painless",
|
||||
"source":"ctx._account.number++"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can also specify an ingest pipeline to transform your data during the reindexing process.
|
||||
|
||||
You would first have to create a pipeline with `processors` defined. You have a number of different `processors` available to use in your ingest pipeline.
|
||||
|
||||
Here's a sample ingest pipeline that defines a `split` processor that splits a `text` field based on a `space` separator and stores it in a new `word` field. The `script` processor is a Painless script that finds the length of the `word` field and stores it in a new `word_count` field. The `remove` processor removes the `test` field.
|
||||
|
||||
```json
|
||||
PUT _ingest/pipeline/pipeline-test
|
||||
{
|
||||
"description": "Splits the text field into a list. Computes the length of the 'word' field and stores it in a new 'word_count' field. Removes the 'test' field.",
|
||||
"processors": [
|
||||
{
|
||||
"split": {
|
||||
"field": "text",
|
||||
"separator": "\\s+",
|
||||
"target_field": "word"
|
||||
},
|
||||
}
|
||||
{
|
||||
"script": {
|
||||
"lang": "painless",
|
||||
"source": "ctx.word_count = ctx.word.length"
|
||||
}
|
||||
},
|
||||
{
|
||||
"remove": {
|
||||
"field": "test"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
After creating a pipeline, you can use the `reindex` operation:
|
||||
|
||||
```json
|
||||
POST _reindex
|
||||
{
|
||||
"source": {
|
||||
"index": "source",
|
||||
},
|
||||
"dest": {
|
||||
"index": "destination",
|
||||
"pipeline": "pipeline-test"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Update documents in the current index
|
||||
|
||||
To update the data in your current index itself without copying it to a different index, use the `update_by_query` operation.
|
||||
|
||||
The `update_by_query` operation is `POST` operation that you can perform on a single index at a time.
|
||||
|
||||
```json
|
||||
POST <index_name>/_update_by_query
|
||||
```
|
||||
|
||||
If you run this command with no parameters, it increments the version number for all documents in the index.
|
||||
|
||||
## Source index options
|
||||
|
||||
You can specify the following options for your source index:
|
||||
|
||||
Option | Valid values | Description | Required
|
||||
:--- | :--- | :---
|
||||
`index` | String | The name of the source index. You can provide multiple source indices as a list. | Yes
|
||||
`max_docs` | Integer | The maximum number of documents to reindex. | No
|
||||
`query` | Object | The search query to use for the reindex operation. | No
|
||||
`size` | Integer | The number of documents to reindex. | No
|
||||
`slice` | String | Specify manual or automatic slicing to parallelize reindexing. | No
|
||||
`sort` | List | Sort specific fields in the document before reindexing. | No
|
||||
|
||||
## Destination index options
|
||||
|
||||
You can specify the following options for your destination index:
|
||||
|
||||
Option | Valid values | Description | Required
|
||||
:--- | :--- | :---
|
||||
`index` | String | The name of the destination index. | Yes
|
||||
`version_type` | Enum | The version type for the indexing operation. Valid values: internal, external, external_gt, external_gte. | No
|
|
@ -0,0 +1,169 @@
|
|||
---
|
||||
layout: default
|
||||
title: Bulk
|
||||
parent: REST API reference
|
||||
nav_order: 15
|
||||
---
|
||||
|
||||
# Bulk
|
||||
|
||||
The bulk operation lets you add, update, or delete many documents in a single request. Compared to individual OpenSearch indexing requests, the bulk operation has significant performance benefits. Whenever practical, we recommend batching indexing operations into bulk requests.
|
||||
|
||||
|
||||
## Example
|
||||
|
||||
```json
|
||||
POST _bulk
|
||||
{ "delete": { "_index": "movies", "_id": "tt2229499" } }
|
||||
{ "index": { "_index": "movies", "_id": "tt1979320" } }
|
||||
{ "title": "Rush", "year": 2013 }
|
||||
{ "create": { "_index": "movies", "_id": "tt1392214" } }
|
||||
{ "title": "Prisoners", "year": 2013 }
|
||||
{ "update": { "_index": "movies", "_id": "tt0816711" } }
|
||||
{ "doc" : { "title": "World War Z" } }
|
||||
|
||||
```
|
||||
|
||||
|
||||
## Path and HTTP methods
|
||||
|
||||
```
|
||||
POST _bulk
|
||||
POST {index}/_bulk
|
||||
```
|
||||
|
||||
Specifying the index in the path means you don't need to include it in the [request body](#request-body).
|
||||
|
||||
OpenSearch also accepts PUT requests to the `_bulk` path, but we highly recommend using POST. The accepted usage of PUT---adding or replacing a single resource at a given path---doesn't make sense for bulk requests.
|
||||
{: .note }
|
||||
|
||||
|
||||
## URL parameters
|
||||
|
||||
All bulk URL parameters are optional.
|
||||
|
||||
Parameter | Type | Description
|
||||
:--- | :--- | :---
|
||||
pipeline | String | The pipeline ID for preprocessing documents.
|
||||
refresh | Enum | Whether to refresh the affected shards after performing the indexing operations. Default is `false`. `true` makes the changes show up in search results immediately, but hurts cluster performance. `wait_for` waits for a refresh. Requests take longer to return, but cluster performance doesn't suffer.
|
||||
require_alias | Boolean | Set to `true` to require that all actions target an index alias rather than an index. Default is `false`.
|
||||
routing | String | Routes the request to the specified shard.
|
||||
timeout | Time | How long to wait for the request to return. Default `1m`.
|
||||
type | String | (Deprecated) The default document type for documents that don't specify a type. Default is `_doc`. We highly recommend ignoring this parameter and using a type of `_doc` for all indices.
|
||||
wait_for_active_shards | String | Specifies the number of active shards that must be available before OpenSearch processes the bulk request. Default is 1 (only the primary shard). Set to `all` or a positive integer. Values greater than 1 require replicas. For example, if you specify a value of 3, the index must have two replicas distributed across two additional nodes for the request to succeed.
|
||||
{% comment %}_source | List | asdf
|
||||
_source_excludes | list | asdf
|
||||
_source_includes | list | asdf{% endcomment %}
|
||||
|
||||
|
||||
## Request body
|
||||
|
||||
The bulk request body follows this pattern:
|
||||
|
||||
```
|
||||
Action and metadata\n
|
||||
Optional document\n
|
||||
Action and metadata\n
|
||||
Optional document\n
|
||||
|
||||
```
|
||||
|
||||
The optional JSON document doesn't need to be minified---spaces are fine---but it does need to be on a single line. OpenSearch uses newline characters to parse bulk requests and requires that the request body end with a newline character.
|
||||
|
||||
All actions support the same metadata: `_index`, `_id`, and `_require_alias`. If you don't provide an ID, OpenSearch generates one automatically, which can make it challenging to update the document at a later time.
|
||||
|
||||
- Create
|
||||
|
||||
Creates a document if it doesn't already exist and returns an error otherwise. The next line must include a JSON document.
|
||||
|
||||
```json
|
||||
{ "create": { "_index": "movies", "_id": "tt1392214" } }
|
||||
{ "title": "Prisoners", "year": 2013 }
|
||||
```
|
||||
|
||||
- Delete
|
||||
|
||||
This action deletes a document if it exists. If the document doesn't exist, OpenSearch doesn't return an error, but instead returns `not_found` under `result`. Delete actions don't require documents on the next line.
|
||||
|
||||
```json
|
||||
{ "delete": { "_index": "movies", "_id": "tt2229499" } }
|
||||
```
|
||||
|
||||
- Index
|
||||
|
||||
Index actions create a document if it doesn't yet exist and replace the document if it already exists. The next line must include a JSON document.
|
||||
|
||||
```json
|
||||
{ "index": { "_index": "movies", "_id": "tt1979320" } }
|
||||
{ "title": "Rush", "year": 2013}
|
||||
```
|
||||
|
||||
- Update
|
||||
|
||||
This action updates existing documents and returns an error if the document doesn't exist. The next line must include a full or partial JSON document, depending on how much of the document you want to update. It can also include a script or upsert for more complex document updates.
|
||||
|
||||
```json
|
||||
{ "update": { "_index": "movies", "_id": "tt0816711" } }
|
||||
{ "doc" : { "title": "World War Z" } }
|
||||
```
|
||||
|
||||
|
||||
## Response
|
||||
|
||||
In the response, pay particular attention to the top-level `errors` boolean. If true, you can iterate over the individual actions for more detailed information.
|
||||
|
||||
```json
|
||||
{
|
||||
"took": 11,
|
||||
"errors": true,
|
||||
"items": [
|
||||
{
|
||||
"index": {
|
||||
"_index": "movies",
|
||||
"_type": "_doc",
|
||||
"_id": "tt1979320",
|
||||
"_version": 1,
|
||||
"result": "created",
|
||||
"_shards": {
|
||||
"total": 2,
|
||||
"successful": 1,
|
||||
"failed": 0
|
||||
},
|
||||
"_seq_no": 1,
|
||||
"_primary_term": 1,
|
||||
"status": 201
|
||||
}
|
||||
},
|
||||
{
|
||||
"create": {
|
||||
"_index": "movies",
|
||||
"_type": "_doc",
|
||||
"_id": "tt1392214",
|
||||
"status": 409,
|
||||
"error": {
|
||||
"type": "version_conflict_engine_exception",
|
||||
"reason": "[tt1392214]: version conflict, document already exists (current version [1])",
|
||||
"index": "movies",
|
||||
"shard": "0",
|
||||
"index_uuid": "yhizhusbSWmP0G7OJnmcLg"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"update": {
|
||||
"_index": "movies",
|
||||
"_type": "_doc",
|
||||
"_id": "tt0816711",
|
||||
"status": 404,
|
||||
"error": {
|
||||
"type": "document_missing_exception",
|
||||
"reason": "[_doc][tt0816711]: document missing",
|
||||
"index": "movies",
|
||||
"shard": "0",
|
||||
"index_uuid": "yhizhusbSWmP0G7OJnmcLg"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
|
@ -0,0 +1,144 @@
|
|||
---
|
||||
layout: default
|
||||
title: Cluster allocation explain
|
||||
parent: REST API reference
|
||||
nav_order: 30
|
||||
---
|
||||
|
||||
# Cluster allocation explain
|
||||
|
||||
The most basic cluster allocation explain request finds an unassigned shard and explains why it can't be allocated to a node.
|
||||
|
||||
If you add some options, you can instead get information on a specific shard, including why OpenSearch assigned it to its current node.
|
||||
|
||||
|
||||
## Example
|
||||
|
||||
```json
|
||||
GET /_cluster/allocation/explain?include_yes_decisions=true
|
||||
{
|
||||
"index": "movies",
|
||||
"shard": 0,
|
||||
"primary": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Path and HTTP methods
|
||||
|
||||
```
|
||||
GET _cluster/allocation/explain
|
||||
POST _cluster/allocation/explain
|
||||
```
|
||||
|
||||
|
||||
## URL parameters
|
||||
|
||||
All cluster allocation explain parameters are optional.
|
||||
|
||||
Parameter | Type | Description
|
||||
:--- | :--- | :---
|
||||
include_yes_decisions | Boolean | OpenSearch makes a series of yes or no decisions when trying to allocate a shard to a node. If this parameter is true, OpenSearch includes the (generally more numerous) "yes" decisions in its response. Default is false.
|
||||
include_disk_info | Boolean | Whether to include information about disk usage in the response. Default is false.
|
||||
|
||||
|
||||
## Request body
|
||||
|
||||
All cluster allocation explain fields are optional.
|
||||
|
||||
Field | Type | Description
|
||||
:--- | :--- | :---
|
||||
current_node | String | If you only want an explanation if the shard happens to be on a particular node, specify that node name here.
|
||||
index | String | The name of the shard's index.
|
||||
primary | Boolean | Whether to provide an explanation for the primary shard (true) or its first replica (false), which share the same shard ID.
|
||||
shard | Integer | The shard ID that you want an explanation for.
|
||||
|
||||
|
||||
## Response
|
||||
|
||||
```json
|
||||
{
|
||||
"index": "movies",
|
||||
"shard": 0,
|
||||
"primary": true,
|
||||
"current_state": "started",
|
||||
"current_node": {
|
||||
"id": "d8jRZcW1QmCBeVFlgOJx5A",
|
||||
"name": "opensearch-node1",
|
||||
"transport_address": "172.24.0.4:9300",
|
||||
"weight_ranking": 1
|
||||
},
|
||||
"can_remain_on_current_node": "yes",
|
||||
"can_rebalance_cluster": "yes",
|
||||
"can_rebalance_to_other_node": "no",
|
||||
"rebalance_explanation": "cannot rebalance as no target node exists that can both allocate this shard and improve the cluster balance",
|
||||
"node_allocation_decisions": [{
|
||||
"node_id": "vRxi4uPcRt2BtHlFoyCyTQ",
|
||||
"node_name": "opensearch-node2",
|
||||
"transport_address": "172.24.0.3:9300",
|
||||
"node_decision": "no",
|
||||
"weight_ranking": 1,
|
||||
"deciders": [{
|
||||
"decider": "max_retry",
|
||||
"decision": "YES",
|
||||
"explanation": "shard has no previous failures"
|
||||
},
|
||||
{
|
||||
"decider": "replica_after_primary_active",
|
||||
"decision": "YES",
|
||||
"explanation": "shard is primary and can be allocated"
|
||||
},
|
||||
{
|
||||
"decider": "enable",
|
||||
"decision": "YES",
|
||||
"explanation": "all allocations are allowed"
|
||||
},
|
||||
{
|
||||
"decider": "node_version",
|
||||
"decision": "YES",
|
||||
"explanation": "can relocate primary shard from a node with version [1.0.0] to a node with equal-or-newer version [1.0.0]"
|
||||
},
|
||||
{
|
||||
"decider": "snapshot_in_progress",
|
||||
"decision": "YES",
|
||||
"explanation": "no snapshots are currently running"
|
||||
},
|
||||
{
|
||||
"decider": "restore_in_progress",
|
||||
"decision": "YES",
|
||||
"explanation": "ignored as shard is not being recovered from a snapshot"
|
||||
},
|
||||
{
|
||||
"decider": "filter",
|
||||
"decision": "YES",
|
||||
"explanation": "node passes include/exclude/require filters"
|
||||
},
|
||||
{
|
||||
"decider": "same_shard",
|
||||
"decision": "NO",
|
||||
"explanation": "a copy of this shard is already allocated to this node [[movies][0], node[vRxi4uPcRt2BtHlFoyCyTQ], [R], s[STARTED], a[id=x8w7QxWdQQa188HKGn0iMQ]]"
|
||||
},
|
||||
{
|
||||
"decider": "disk_threshold",
|
||||
"decision": "YES",
|
||||
"explanation": "enough disk for shard on node, free: [35.9gb], shard size: [15.1kb], free after allocating shard: [35.9gb]"
|
||||
},
|
||||
{
|
||||
"decider": "throttling",
|
||||
"decision": "YES",
|
||||
"explanation": "below shard recovery limit of outgoing: [0 < 2] incoming: [0 < 2]"
|
||||
},
|
||||
{
|
||||
"decider": "shards_limit",
|
||||
"decision": "YES",
|
||||
"explanation": "total shard limits are disabled: [index: -1, cluster: -1] <= 0"
|
||||
},
|
||||
{
|
||||
"decider": "awareness",
|
||||
"decision": "YES",
|
||||
"explanation": "allocation awareness is not enabled, set cluster setting [cluster.routing.allocation.awareness.attributes] to enable it"
|
||||
}
|
||||
]
|
||||
}]
|
||||
}
|
||||
```
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
layout: default
|
||||
title: REST API reference
|
||||
nav_order: 99
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# REST API reference
|
||||
|
||||
OpenSearch uses its REST API for most operations. This _incomplete_ section includes REST API paths, HTTP verbs, supported parameters, request body details, and sample responses.
|
|
@ -0,0 +1,415 @@
|
|||
---
|
||||
layout: default
|
||||
title: Search templates
|
||||
nav_order: 50
|
||||
---
|
||||
|
||||
# Search templates
|
||||
|
||||
You can convert your full-text queries into a search template to accept user input and dynamically insert it into your query.
|
||||
|
||||
For example, if you use OpenSearch as a backend search engine for your application or website, you can take in user queries from a search bar or a form field and pass them as parameters into a search template. That way, the syntax to create OpenSearch queries is abstracted from your end users.
|
||||
|
||||
When you're writing code to convert user input into OpenSearch queries, you can simplify your code with search templates. If you need to add fields to your search query, you can just modify the template without making changes to your code.
|
||||
|
||||
Search templates use the Mustache language. For a list of all syntax options, see the [Mustache manual](http://mustache.github.io/mustache.5.html).
|
||||
{: .note }
|
||||
|
||||
## Create search templates
|
||||
|
||||
A search template has two components: the query and the parameters. Parameters are user-inputted values that get placed into variables. Variables are represented with double braces in Mustache notation. When encountering a variable like `{% raw %}{{var}}{% endraw %}` in the query, OpenSearch goes to the `params` section, looks for a parameter called `var`, and replaces it with the specified value.
|
||||
|
||||
You can code your application to ask your user what they want to search for and then plug that value into the `params` object at runtime.
|
||||
|
||||
This command defines a search template to find a play by its name. The `{% raw %}{{play_name}}{% endraw %}` in the query is replaced by the value `Henry IV`:
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": {
|
||||
"query": {
|
||||
"match": {
|
||||
"play_name": "{% raw %}{{play_name}}{% endraw %}"
|
||||
}
|
||||
}
|
||||
},
|
||||
"params": {
|
||||
"play_name": "Henry IV"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This template runs the search on your entire cluster.
|
||||
To run this search on a specific index, add the index name to the request:
|
||||
|
||||
```json
|
||||
GET shakespeare/_search/template
|
||||
```
|
||||
|
||||
Specify the `from` and `size` parameters:
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": {
|
||||
"from": "{% raw %}{{from}}{% endraw %}",
|
||||
"size": "{% raw %}{{size}}{% endraw %}",
|
||||
"query": {
|
||||
"match": {
|
||||
"play_name": "{% raw %}{{play_name}}{% endraw %}"
|
||||
}
|
||||
}
|
||||
},
|
||||
"params": {
|
||||
"play_name": "Henry IV",
|
||||
"from": 10,
|
||||
"size": 10
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
To improve the search experience, you can define defaults so the user doesn’t have to specify every possible parameter. If the parameter is not defined in the `params` section, OpenSearch uses the default value.
|
||||
|
||||
The syntax for defining the default value for a variable `var` is as follows:
|
||||
|
||||
```json
|
||||
{% raw %}{{var}}{{^var}}default value{{/var}}{% endraw %}
|
||||
```
|
||||
|
||||
This command sets the defaults for `from` as 10 and `size` as 10:
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": {
|
||||
"from": "{% raw %}{{from}}{{^from}}10{{/from}}{% endraw %}",
|
||||
"size": "{% raw %}{{size}}{{^size}}10{{/size}}{% endraw %}",
|
||||
"query": {
|
||||
"match": {
|
||||
"play_name": "{% raw %}{{play_name}}{% endraw %}"
|
||||
}
|
||||
}
|
||||
},
|
||||
"params": {
|
||||
"play_name": "Henry IV"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Save and execute search templates
|
||||
|
||||
After the search template works the way you want it to, you can save the source of that template as a script, making it reusable for different input parameters.
|
||||
|
||||
When saving the search template as a script, you need to specify the `lang` parameter as `mustache`:
|
||||
|
||||
```json
|
||||
POST _scripts/play_search_template
|
||||
{
|
||||
"script": {
|
||||
"lang": "mustache",
|
||||
"source": {
|
||||
"from": "{% raw %}{{from}}{{^from}}0{{/from}}{% endraw %}",
|
||||
"size": "{% raw %}{{size}}{{^size}}10{{/size}}{% endraw %}",
|
||||
"query": {
|
||||
"match": {
|
||||
"play_name": "{{play_name}}"
|
||||
}
|
||||
}
|
||||
},
|
||||
"params": {
|
||||
"play_name": "Henry IV"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Now you can reuse the template by referring to its `id` parameter.
|
||||
You can reuse this source template for different input values.
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"id": "play_search_template",
|
||||
"params": {
|
||||
"play_name": "Henry IV",
|
||||
"from": 0,
|
||||
"size": 1
|
||||
}
|
||||
}
|
||||
```
|
||||
#### Sample output
|
||||
|
||||
```json
|
||||
{
|
||||
"took": 7,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 6,
|
||||
"successful": 6,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": {
|
||||
"value": 3205,
|
||||
"relation": "eq"
|
||||
},
|
||||
"max_score": 3.641852,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "shakespeare",
|
||||
"_type": "_doc",
|
||||
"_id": "4",
|
||||
"_score": 3.641852,
|
||||
"_source": {
|
||||
"type": "line",
|
||||
"line_id": 5,
|
||||
"play_name": "Henry IV",
|
||||
"speech_number": 1,
|
||||
"line_number": "1.1.2",
|
||||
"speaker": "KING HENRY IV",
|
||||
"text_entry": "Find we a time for frighted peace to pant,"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If you have a stored template and want to validate it, use the `render` operation:
|
||||
|
||||
```json
|
||||
POST _render/template
|
||||
{
|
||||
"id": "play_search_template",
|
||||
"params": {
|
||||
"play_name": "Henry IV"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample output
|
||||
|
||||
```json
|
||||
{
|
||||
"template_output": {
|
||||
"from": "0",
|
||||
"size": "10",
|
||||
"query": {
|
||||
"match": {
|
||||
"play_name": "Henry IV"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced parameter conversion with search templates
|
||||
|
||||
You have a lot of different syntax options in Mustache to transpose the input parameters into a query.
|
||||
You can specify conditions, run loops, join arrays, convert arrays to JSON, and so on.
|
||||
|
||||
### Conditions
|
||||
|
||||
Use the section tag in Mustache to represent conditions:
|
||||
|
||||
```json
|
||||
{% raw %}{{#var}}var{{/var}}{% endraw %}
|
||||
```
|
||||
|
||||
When `var` is a boolean value, this syntax acts as an `if` condition. The `{% raw %}{{#var}}{% endraw %}` and `{% raw %}{{/var}}{% endraw %}` tags insert the values placed between them only if `var` evaluates to `true`.
|
||||
|
||||
Using section tags would make your JSON invalid, so you must write your query in a string format instead.
|
||||
|
||||
This command includes the `size` parameter in the query only when the `limit` parameter is set to `true`.
|
||||
In the following example, the `limit` parameter is `true`, so the `size` parameter is activated. As a result, you would get back only two documents.
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": "{% raw %}{ {{#limit}} \"size\": \"{{size}}\", {{/limit}} \"query\":{\"match\":{\"play_name\": \"{{play_name}}\"}}}{% endraw %}",
|
||||
"params": {
|
||||
"play_name": "Henry IV",
|
||||
"limit": true,
|
||||
"size": 2
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can also design an `if-else` condition.
|
||||
This command sets `size` to `2` if `limit` is `true`. Otherwise, it sets `size` to `10`.
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": "{% raw %}{ {{#limit}} \"size\": \"2\", {{/limit}} {{^limit}} \"size\": \"10\", {{/limit}} \"query\":{\"match\":{\"play_name\": \"{{play_name}}\"}}}{% endraw %}",
|
||||
"params": {
|
||||
"play_name": "Henry IV",
|
||||
"limit": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Loops
|
||||
|
||||
You can also use the section tag to implement a foreach loop:
|
||||
|
||||
```
|
||||
{% raw %}{{#var}}{{.}}}{{/var}}{% endraw %}
|
||||
```
|
||||
|
||||
When `var` is an array, the search template iterates through it and creates a `terms` query.
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": "{% raw %}{\"query\":{\"terms\":{\"play_name\":[\"{{#play_name}}\",\"{{.}}\",\"{{/play_name}}\"]}}}{% endraw %}",
|
||||
"params": {
|
||||
"play_name": [
|
||||
"Henry IV",
|
||||
"Othello"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This template is rendered as:
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": {
|
||||
"query": {
|
||||
"terms": {
|
||||
"play_name": [
|
||||
"Henry IV",
|
||||
"Othello"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Join
|
||||
|
||||
You can use the `join` tag to concatenate values of an array (separated by commas):
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": {
|
||||
"query": {
|
||||
"match": {
|
||||
"text_entry": "{% raw %}{{#join}}{{text_entry}}{{/join}}{% endraw %}"
|
||||
}
|
||||
}
|
||||
},
|
||||
"params": {
|
||||
"text_entry": [
|
||||
"To be",
|
||||
"or not to be"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Renders as:
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": {
|
||||
"query": {
|
||||
"match": {
|
||||
"text_entry": "{0=To be, 1=or not to be}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Convert to JSON
|
||||
|
||||
You can use the `toJson` tag to convert parameters to their JSON representation:
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": "{\"query\":{\"bool\":{\"must\":[{\"terms\": {\"text_entries\": {% raw %}{{#toJson}}text_entries{{/toJson}}{% endraw %} }}] }}}",
|
||||
"params": {
|
||||
"text_entries": [
|
||||
{ "term": { "text_entry" : "love" } },
|
||||
{ "term": { "text_entry" : "soldier" } }
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Renders as:
|
||||
|
||||
```json
|
||||
GET _search/template
|
||||
{
|
||||
"source": {
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
{
|
||||
"terms": {
|
||||
"text_entries": [
|
||||
{
|
||||
"term": {
|
||||
"text_entry": "love"
|
||||
}
|
||||
},
|
||||
{
|
||||
"term": {
|
||||
"text_entry": "soldier"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Multiple search templates
|
||||
|
||||
You can bundle multiple search templates and send them to your OpenSearch cluster in a single request using the `msearch` operation.
|
||||
This saves network round trip time, so you get back the response more quickly as compared to independent requests.
|
||||
|
||||
```json
|
||||
GET _msearch/template
|
||||
{"index":"shakespeare"}
|
||||
{"id":"if_search_template","params":{"play_name":"Henry IV","limit":false,"size":2}}
|
||||
{"index":"shakespeare"}
|
||||
{"id":"play_search_template","params":{"play_name":"Henry IV"}}
|
||||
```
|
||||
|
||||
## Manage search templates
|
||||
|
||||
To list all scripts, run the following command:
|
||||
|
||||
```json
|
||||
GET _cluster/state/metadata?pretty&filter_path=**.stored_scripts
|
||||
```
|
||||
|
||||
To retrieve a specific search template, run the following command:
|
||||
|
||||
```json
|
||||
GET _scripts/<name_of_search_template>
|
||||
```
|
||||
|
||||
To delete a search template, run the following command:
|
||||
|
||||
```json
|
||||
DELETE _scripts/<name_of_search_template>
|
||||
```
|
||||
|
||||
---
|
|
@ -0,0 +1,381 @@
|
|||
---
|
||||
layout: default
|
||||
title: Take and restore snapshots
|
||||
nav_order: 65
|
||||
---
|
||||
|
||||
# Take and restore snapshots
|
||||
|
||||
Snapshots are backups of a cluster's indices and state. State includes cluster settings, node information, index settings, and shard allocation.
|
||||
|
||||
Snapshots have two main uses:
|
||||
|
||||
- **Recovering from failure**
|
||||
|
||||
For example, if cluster health goes red, you might restore the red indices from a snapshot.
|
||||
|
||||
- **Migrating from one cluster to another**
|
||||
|
||||
For example, if you're moving from a proof-of-concept to a production cluster, you might take a snapshot of the former and restore it on the latter.
|
||||
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## About snapshots
|
||||
|
||||
Snapshots aren't instantaneous. They take time to complete and do not represent perfect point-in-time views of the cluster. While a snapshot is in progress, you can still index documents and make other requests to the cluster, but new documents and updates to existing documents generally aren't included in the snapshot. The snapshot includes primary shards as they existed when OpenSearch initiated the snapshot. Depending on the size of your snapshot thread pool, different shards might be included in the snapshot at slightly different times.
|
||||
|
||||
OpenSearch snapshots are incremental, meaning that they only store data that has changed since the last successful snapshot. The difference in disk usage between frequent and infrequent snapshots is often minimal.
|
||||
|
||||
In other words, taking hourly snapshots for a week (for a total of 168 snapshots) might not use much more disk space than taking a single snapshot at the end of the week. Also, the more frequently you take snapshots, the less time they take to complete. Some OpenSearch users take snapshots as often as every half hour.
|
||||
|
||||
If you need to delete a snapshot, be sure to use the OpenSearch API rather than navigating to the storage location and purging files. Incremental snapshots from a cluster often share a lot of the same data; when you use the API, OpenSearch only removes data that no other snapshot is using.
|
||||
{: .tip }
|
||||
|
||||
|
||||
## Register repository
|
||||
|
||||
Before you can take a snapshot, you have to "register" a snapshot repository. A snapshot repository is just a storage location: a shared file system, Amazon S3, Hadoop Distributed File System (HDFS), Azure Storage, etc.
|
||||
|
||||
|
||||
### Shared file system
|
||||
|
||||
1. To use a shared file system as a snapshot repository, add it to `opensearch.yml`:
|
||||
|
||||
```yml
|
||||
path.repo: ["/mnt/snapshots"]
|
||||
```
|
||||
|
||||
On the RPM and Debian installs, you can then mount the file system. If you're using the Docker install, add the file system to each node in `docker-compose.yml` before starting the cluster:
|
||||
|
||||
```yml
|
||||
volumes:
|
||||
- /Users/jdoe/snapshots:/mnt/snapshots
|
||||
```
|
||||
|
||||
1. Then register the repository using the REST API:
|
||||
|
||||
```json
|
||||
PUT _snapshot/my-fs-repository
|
||||
{
|
||||
"type": "fs",
|
||||
"settings": {
|
||||
"location": "/mnt/snapshots"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If the request is successful, the response from OpenSearch is minimal:
|
||||
|
||||
```json
|
||||
{
|
||||
"acknowledged": true
|
||||
}
|
||||
```
|
||||
|
||||
You probably only need to specify `location`, but the following table summarizes the options:
|
||||
|
||||
Setting | Description
|
||||
:--- | :---
|
||||
`location` | The shared file system for snapshots. Required.
|
||||
`chunk_size` | Breaks large files into chunks during snapshot operations (e.g. `64mb`, `1gb`), which is important for cloud storage providers and far less important for shared file systems. Default is `null` (unlimited). Optional.
|
||||
`compress` | Whether to compress metadata files. This setting does not affect data files, which might already be compressed, depending on your index settings. Default is `false`. Optional.
|
||||
`max_restore_bytes_per_sec` | The maximum rate at which snapshots restore. Default is 40 MB per second (`40m`). Optional.
|
||||
`max_snapshot_bytes_per_sec` | The maximum rate at which snapshots take. Default is 40 MB per second (`40m`). Optional.
|
||||
`readonly` | Whether the repository is read-only. Useful when migrating from one cluster (`"readonly": false` when registering) to another cluster (`"readonly": true` when registering). Optional.
|
||||
|
||||
|
||||
### Amazon S3
|
||||
|
||||
1. To use an Amazon S3 bucket as a snapshot repository, install the `repository-s3` plugin on all nodes:
|
||||
|
||||
```bash
|
||||
sudo ./bin/opensearch-plugin install repository-s3
|
||||
```
|
||||
|
||||
If you're using the Docker installation, see [Customize the Docker image](../../install/docker/#customize-the-docker-image). Your `Dockerfile` should look something like this:
|
||||
|
||||
```
|
||||
FROM opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
|
||||
ENV AWS_ACCESS_KEY_ID <access-key>
|
||||
ENV AWS_SECRET_ACCESS_KEY <secret-key>
|
||||
|
||||
# Optional
|
||||
ENV AWS_SESSION_TOKEN <optional-session-token>
|
||||
|
||||
RUN /usr/share/opensearch/bin/opensearch-plugin install --batch repository-s3
|
||||
RUN /usr/share/opensearch/bin/opensearch-keystore create
|
||||
|
||||
RUN echo $AWS_ACCESS_KEY_ID | /usr/share/opensearch/bin/opensearch-keystore add --stdin s3.client.default.access_key
|
||||
RUN echo $AWS_SECRET_ACCESS_KEY | /usr/share/opensearch/bin/opensearch-keystore add --stdin s3.client.default.secret_key
|
||||
|
||||
# Optional
|
||||
RUN echo $AWS_SESSION_TOKEN | /usr/share/opensearch/bin/opensearch-keystore add --stdin s3.client.default.session_token
|
||||
```
|
||||
|
||||
After the Docker cluster starts, skip to step 7.
|
||||
|
||||
1. Add your AWS access and secret keys to the OpenSearch keystore:
|
||||
|
||||
```bash
|
||||
sudo ./bin/opensearch-keystore add s3.client.default.access_key
|
||||
sudo ./bin/opensearch-keystore add s3.client.default.secret_key
|
||||
```
|
||||
|
||||
1. (Optional) If you're using temporary credentials, add your session token:
|
||||
|
||||
```bash
|
||||
sudo ./bin/opensearch-keystore add s3.client.default.session_token
|
||||
```
|
||||
|
||||
1. (Optional) If you connect to the internet through a proxy, add those credentials:
|
||||
|
||||
```bash
|
||||
sudo ./bin/opensearch-keystore add s3.client.default.proxy.username
|
||||
sudo ./bin/opensearch-keystore add s3.client.default.proxy.password
|
||||
```
|
||||
|
||||
1. (Optional) Add other settings to `opensearch.yml`:
|
||||
|
||||
```yml
|
||||
s3.client.default.disable_chunked_encoding: false # Disables chunked encoding for compatibility with some storage services, but you probably don't need to change this value.
|
||||
s3.client.default.endpoint: s3.amazonaws.com # S3 has alternate endpoints, but you probably don't need to change this value.
|
||||
s3.client.default.max_retries: 3 # number of retries if a request fails
|
||||
s3.client.default.path_style_access: false # whether to use the deprecated path-style bucket URLs.
|
||||
# You probably don't need to change this value, but for more information, see https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access.
|
||||
s3.client.default.protocol: https # http or https
|
||||
s3.client.default.proxy.host: my-proxy-host # the hostname for your proxy server
|
||||
s3.client.default.proxy.port: 8080 # port for your proxy server
|
||||
s3.client.default.read_timeout: 50s # the S3 connection timeout
|
||||
s3.client.default.use_throttle_retries: true # whether the client should wait a progressively longer amount of time (exponential backoff) between each successive retry
|
||||
```
|
||||
|
||||
1. If you changed `opensearch.yml`, you must restart each node in the cluster. Otherwise, you only need to reload secure cluster settings:
|
||||
|
||||
```
|
||||
POST _nodes/reload_secure_settings
|
||||
```
|
||||
|
||||
1. Create an S3 bucket if you don't already have one. To take snapshots, you need permissions to access the bucket. The following IAM policy is an example of those permissions:
|
||||
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [{
|
||||
"Action": [
|
||||
"s3:*"
|
||||
],
|
||||
"Effect": "Allow",
|
||||
"Resource": [
|
||||
"arn:aws:s3:::your-bucket",
|
||||
"arn:aws:s3:::your-bucket/*"
|
||||
]
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
1. Register the repository using the REST API:
|
||||
|
||||
```json
|
||||
PUT _snapshot/my-s3-repository
|
||||
{
|
||||
"type": "s3",
|
||||
"settings": {
|
||||
"bucket": "my-s3-bucket",
|
||||
"base_path": "my/snapshot/directory"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You probably don't need to specify anything but `bucket` and `base_path`, but the following table summarizes the options:
|
||||
|
||||
Setting | Description
|
||||
:--- | :---
|
||||
`base_path` | The path within the bucket where you want to store snapshots (e.g. `my/snapshot/directory`). Optional. If not specified, snapshots are stored in the bucket root.
|
||||
`bucket` | Name of the S3 bucket. Required.
|
||||
`buffer_size` | The threshold beyond which chunks (of `chunk_size`) should be broken into pieces (of `buffer_size`) and sent to S3 using a different API. Default is the smaller of two values: 100 MB or 5% of the Java heap. Valid values are between `5mb` and `5gb`. We don't recommend changing this option.
|
||||
`canned_acl` | S3 has several [canned ACLs](https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl) that the `repository-s3` plugin can add to objects as it creates them in S3. Default is `private`. Optional.
|
||||
`chunk_size` | Breaks files into chunks during snapshot operations (e.g. `64mb`, `1gb`), which is important for cloud storage providers and far less important for shared file systems. Default is `1gb`. Optional.
|
||||
`client` | When specifying client settings (e.g. `s3.client.default.access_key`), you can use a string other than `default` (e.g. `s3.client.backup-role.access_key`). If you used an alternate name, change this value to match. Default and recommended value is `default`. Optional.
|
||||
`compress` | Whether to compress metadata files. This setting does not affect data files, which might already be compressed, depending on your index settings. Default is `false`. Optional.
|
||||
`max_restore_bytes_per_sec` | The maximum rate at which snapshots restore. Default is 40 MB per second (`40m`). Optional.
|
||||
`max_snapshot_bytes_per_sec` | The maximum rate at which snapshots take. Default is 40 MB per second (`40m`). Optional.
|
||||
`readonly` | Whether the repository is read-only. Useful when migrating from one cluster (`"readonly": false` when registering) to another cluster (`"readonly": true` when registering). Optional.
|
||||
`server_side_encryption` | Whether to encrypt snapshot files in the S3 bucket. This setting uses AES-256 with S3-managed keys. See [Protecting data using server-side encryption](https://docs.aws.amazon.com/AmazonS3/latest/dev/serv-side-encryption.html). Default is false. Optional.
|
||||
`storage_class` | Specifies the [S3 storage class](https://docs.aws.amazon.com/AmazonS3/latest/dev/storage-class-intro.html) for the snapshots files. Default is `standard`. Do not use the `glacier` and `deep_archive` storage classes. Optional.
|
||||
|
||||
|
||||
## Take snapshots
|
||||
|
||||
You specify two pieces of information when you create a snapshot:
|
||||
|
||||
- Name of your snapshot repository
|
||||
- Name for the snapshot
|
||||
|
||||
The following snapshot includes all indices and the cluster state:
|
||||
|
||||
```json
|
||||
PUT _snapshot/my-repository/1
|
||||
```
|
||||
|
||||
You can also add a request body to include or exclude certain indices or specify other settings:
|
||||
|
||||
```json
|
||||
PUT _snapshot/my-repository/2
|
||||
{
|
||||
"indices": "opensearch-dashboards*,my-index*,-my-index-2016",
|
||||
"ignore_unavailable": true,
|
||||
"include_global_state": false,
|
||||
"partial": false
|
||||
}
|
||||
```
|
||||
|
||||
Setting | Description
|
||||
:--- | :---
|
||||
`indices` | The indices you want to include in the snapshot. You can use `,` to create a list of indices, `*` to specify an index pattern, and `-` to exclude certain indices. Don't put spaces between items. Default is all indices.
|
||||
`ignore_unavailable` | If an index from the `indices` list doesn't exist, whether to ignore it rather than fail the snapshot. Default is false.
|
||||
`include_global_state` | Whether to include cluster state in the snapshot. Default is true.
|
||||
`partial` | Whether to allow partial snapshots. Default is false, which fails the entire snapshot if one or more shards fails to store.
|
||||
|
||||
If you request the snapshot immediately after taking it, you might see something like this:
|
||||
|
||||
```json
|
||||
GET _snapshot/my-repository/2
|
||||
{
|
||||
"snapshots": [{
|
||||
"snapshot": "2",
|
||||
"version": "6.5.4",
|
||||
"indices": [
|
||||
"opensearch_dashboards_sample_data_ecommerce",
|
||||
"my-index",
|
||||
"opensearch_dashboards_sample_data_logs",
|
||||
"opensearch_dashboards_sample_data_flights"
|
||||
],
|
||||
"include_global_state": true,
|
||||
"state": "IN_PROGRESS",
|
||||
...
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
Note that the snapshot is still in progress. If you want to wait for the snapshot to finish before continuing, add the `wait_for_completion` parameter to your request. Snapshots can take a while to complete, so consider whether or not this option fits your use case:
|
||||
|
||||
```
|
||||
PUT _snapshot/my-repository/3?wait_for_completion=true
|
||||
```
|
||||
|
||||
Snapshots have the following states:
|
||||
|
||||
State | Description
|
||||
:--- | :---
|
||||
SUCCESS | The snapshot successfully stored all shards.
|
||||
IN_PROGRESS | The snapshot is currently running.
|
||||
PARTIAL | At least one shard failed to store successfully. Can only occur if you set `partial` to `true` when taking the snapshot.
|
||||
FAILED | The snapshot encountered an error and stored no data.
|
||||
INCOMPATIBLE | The snapshot is incompatible with the version of OpenSearch running on this cluster. See [Conflicts and compatibility](#conflicts-and-compatibility).
|
||||
|
||||
You can't take a snapshot if one is currently in progress. To check the status:
|
||||
|
||||
```
|
||||
GET _snapshot/_status
|
||||
```
|
||||
|
||||
|
||||
## Restore snapshots
|
||||
|
||||
The first step in restoring a snapshot is retrieving existing snapshots. To see all snapshot repositories:
|
||||
|
||||
```
|
||||
GET _snapshot/_all
|
||||
```
|
||||
|
||||
To see all snapshots in a repository:
|
||||
|
||||
```
|
||||
GET _snapshot/my-repository/_all
|
||||
```
|
||||
|
||||
Then restore a snapshot:
|
||||
|
||||
```
|
||||
POST _snapshot/my-repository/2/_restore
|
||||
```
|
||||
|
||||
Just like when taking a snapshot, you can add a request body to include or exclude certain indices or specify some other settings:
|
||||
|
||||
```json
|
||||
POST _snapshot/my-repository/2/_restore
|
||||
{
|
||||
"indices": "opensearch-dashboards*,my-index*",
|
||||
"ignore_unavailable": true,
|
||||
"include_global_state": false,
|
||||
"include_aliases": false,
|
||||
"partial": false,
|
||||
"rename_pattern": "opensearch-dashboards(.+)",
|
||||
"rename_replacement": "restored-opensearch-dashboards$1",
|
||||
"index_settings": {
|
||||
"index.blocks.read_only": false
|
||||
},
|
||||
"ignore_index_settings": [
|
||||
"index.refresh_interval"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Setting | Description
|
||||
:--- | :---
|
||||
`indices` | The indices you want to restore. You can use `,` to create a list of indices, `*` to specify an index pattern, and `-` to exclude certain indices. Don't put spaces between items. Default is all indices.
|
||||
`ignore_unavailable` | If an index from the `indices` list doesn't exist, whether to ignore it rather than fail the restore operation. Default is false.
|
||||
`include_global_state` | Whether to restore the cluster state. Default is false.
|
||||
`include_aliases` | Whether to restore aliases alongside their associated indices. Default is true.
|
||||
`partial` | Whether to allow the restoration of partial snapshots. Default is false.
|
||||
`rename_pattern` | If you want to rename indices as you restore them, use this option to specify a regular expression that matches all indices you want to restore. Use capture groups (`()`) to reuse portions of the index name.
|
||||
`rename_replacement` | If you want to rename indices as you restore them, use this option to specify the replacement pattern. Use `$0` to include the entire matching index name, `$1` to include the content of the first capture group, etc.
|
||||
`index_settings` | If you want to change index settings on restore, specify them here.
|
||||
`ignore_index_settings` | Rather than explicitly specifying new settings with `index_settings`, you can ignore certain index settings in the snapshot and use the cluster defaults on restore.
|
||||
|
||||
|
||||
### Conflicts and compatibility
|
||||
|
||||
One way to avoid naming conflicts when restoring indices is to use the `rename_pattern` and `rename_replacement` options. Then, if necessary, you can use the `_reindex` API to combine the two. The simpler way is to delete existing indices prior to restoring from a snapshot.
|
||||
|
||||
You can use the `_close` API to close existing indices prior to restoring from a snapshot, but the index in the snapshot has to have the same number of shards as the existing index.
|
||||
|
||||
We recommend ceasing write requests to a cluster before restoring from a snapshot, which helps avoid scenarios such as:
|
||||
|
||||
1. You delete an index, which also deletes its alias.
|
||||
1. A write request to the now-deleted alias creates a new index with the same name as the alias.
|
||||
1. The alias from the snapshot fails to restore due to a naming conflict with the new index.
|
||||
|
||||
Snapshots are only forward-compatible by one major version. If you have an old snapshot, you can sometimes restore it into an intermediate cluster, reindex all indices, take a new snapshot, and repeat until you arrive at your desired version, but you might find it easier to just manually index your data on the new cluster.
|
||||
|
||||
## Security plugin considerations
|
||||
|
||||
If you're using the security plugin, snapshots have some additional restrictions:
|
||||
|
||||
- To perform snapshot and restore operations, users must have the built-in `manage_snapshots` role.
|
||||
- You can't restore snapshots that contain global state or the `.opensearch_security` index.
|
||||
|
||||
If a snapshot contains global state, you must exclude it when performing the restore. If your snapshot also contains the `.opensearch_security` index, either exclude it or list all the other indices you want to include:
|
||||
|
||||
```json
|
||||
POST _snapshot/my-repository/3/_restore
|
||||
{
|
||||
"indices": "-.opensearch_security",
|
||||
"include_global_state": false
|
||||
}
|
||||
```
|
||||
|
||||
The `.opensearch_security` index contains sensitive data, so we recommend excluding it when you take a snapshot. If you do need to restore the index from a snapshot, you must include an admin certificate in the request:
|
||||
|
||||
```bash
|
||||
curl -k --cert ./kirk.pem --key ./kirk-key.pem -XPOST 'https://localhost:9200/_snapshot/my-repository/3/_restore?pretty'
|
||||
```
|
|
@ -0,0 +1,230 @@
|
|||
---
|
||||
layout: default
|
||||
title: Tasks API
|
||||
nav_order: 25
|
||||
---
|
||||
|
||||
# Tasks API operation
|
||||
|
||||
A task is any operation you run in a cluster. For example, searching your data collection of books for a title or author name is a task. When you run OpenSearch, a task is automatically created to monitor your cluster's health and performance. For more information about all of the tasks currently executing in your cluster, you can use the `tasks` API operation.
|
||||
|
||||
The following request returns information about all of your tasks:
|
||||
|
||||
```
|
||||
GET _tasks
|
||||
```
|
||||
|
||||
By including a task ID, you can get information specific to a particular task. Note that a task ID consists of a node's identifying string and the task's numerical ID. For example, if your node's identifying string is `nodestring` and the task's numerical ID is `1234`, then your task ID is `nodestring:1234`. You can find this information by running the `tasks` operation:
|
||||
|
||||
```
|
||||
GET _tasks/<task_id>
|
||||
```
|
||||
|
||||
Note that if a task finishes running, it won't be returned as part of your request. For an example of a task that takes a little longer to finish, you can run the [`_reindex`](../reindex-data) API operation on a larger document, and then run `tasks`.
|
||||
|
||||
**Sample Response**
|
||||
```json
|
||||
{
|
||||
"nodes": {
|
||||
"Mgqdm0r9SEGClWxp_RbnaQ": {
|
||||
"name": "opensearch-node1",
|
||||
"transport_address": "172.18.0.3:9300",
|
||||
"host": "172.18.0.3",
|
||||
"ip": "172.18.0.3:9300",
|
||||
"roles": [
|
||||
"data",
|
||||
"ingest",
|
||||
"master",
|
||||
"remote_cluster_client"
|
||||
],
|
||||
"tasks": {
|
||||
"Mgqdm0r9SEGClWxp_RbnaQ:17416": {
|
||||
"node": "Mgqdm0r9SEGClWxp_RbnaQ",
|
||||
"id": 17416,
|
||||
"type": "transport",
|
||||
"action": "cluster:monitor/tasks/lists",
|
||||
"start_time_in_millis": 1613599752458,
|
||||
"running_time_in_nanos": 994000,
|
||||
"cancellable": false,
|
||||
"headers": {}
|
||||
},
|
||||
"Mgqdm0r9SEGClWxp_RbnaQ:17413": {
|
||||
"node": "Mgqdm0r9SEGClWxp_RbnaQ",
|
||||
"id": 17413,
|
||||
"type": "transport",
|
||||
"action": "indices:data/write/bulk",
|
||||
"start_time_in_millis": 1613599752286,
|
||||
"running_time_in_nanos": 172846500,
|
||||
"cancellable": false,
|
||||
"parent_task_id": "Mgqdm0r9SEGClWxp_RbnaQ:17366",
|
||||
"headers": {}
|
||||
},
|
||||
"Mgqdm0r9SEGClWxp_RbnaQ:17366": {
|
||||
"node": "Mgqdm0r9SEGClWxp_RbnaQ",
|
||||
"id": 17366,
|
||||
"type": "transport",
|
||||
"action": "indices:data/write/reindex",
|
||||
"start_time_in_millis": 1613599750929,
|
||||
"running_time_in_nanos": 1529733100,
|
||||
"cancellable": true,
|
||||
"headers": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
You can also use the following parameters with your query.
|
||||
|
||||
Parameter | Data type | Description |
|
||||
:--- | :--- | :---
|
||||
`nodes` | List | A comma-separated list of node IDs or names to limit the returned information. Use `_local` to return information from the node you're connecting to, specify the node name to get information from specific nodes, or keep the parameter empty to get information from all nodes.
|
||||
`actions` | List | A comma-separated list of actions that should be returned. Keep empty to return all.
|
||||
`detailed` | Boolean | Returns detailed task information. (Default: false)
|
||||
`parent_task_id` | String | Returns tasks with a specified parent task ID (node_id:task_number). Keep empty or set to -1 to return all.
|
||||
`wait_for_completion` | Boolean | Waits for the matching tasks to complete. (Default: false)
|
||||
`group_by` | Enum | Groups tasks by parent/child relationships or nodes. (Default: nodes)
|
||||
`timeout` | Time | An explicit operation timeout. (Default: 30 seconds)
|
||||
`master_timeout` | Time | The time to wait for a connection to the primary node. (Default: 30 seconds)
|
||||
|
||||
For example, this request returns tasks currently running on a node named `opensearch-node1`:
|
||||
|
||||
**Sample Request**
|
||||
|
||||
```
|
||||
GET /_tasks?nodes=opensearch-node1
|
||||
```
|
||||
|
||||
**Sample Response**
|
||||
|
||||
```json
|
||||
{
|
||||
"nodes": {
|
||||
"Mgqdm0r9SEGClWxp_RbnaQ": {
|
||||
"name": "opensearch-node1",
|
||||
"transport_address": "sample_address",
|
||||
"host": "sample_host",
|
||||
"ip": "sample_ip",
|
||||
"roles": [
|
||||
"data",
|
||||
"ingest",
|
||||
"master",
|
||||
"remote_cluster_client"
|
||||
],
|
||||
"tasks": {
|
||||
"Mgqdm0r9SEGClWxp_RbnaQ:24578": {
|
||||
"node": "Mgqdm0r9SEGClWxp_RbnaQ",
|
||||
"id": 24578,
|
||||
"type": "transport",
|
||||
"action": "cluster:monitor/tasks/lists",
|
||||
"start_time_in_millis": 1611612517044,
|
||||
"running_time_in_nanos": 638700,
|
||||
"cancellable": false,
|
||||
"headers": {}
|
||||
},
|
||||
"Mgqdm0r9SEGClWxp_RbnaQ:24579": {
|
||||
"node": "Mgqdm0r9SEGClWxp_RbnaQ",
|
||||
"id": 24579,
|
||||
"type": "direct",
|
||||
"action": "cluster:monitor/tasks/lists[n]",
|
||||
"start_time_in_millis": 1611612517044,
|
||||
"running_time_in_nanos": 222200,
|
||||
"cancellable": false,
|
||||
"parent_task_id": "Mgqdm0r9SEGClWxp_RbnaQ:24578",
|
||||
"headers": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Task canceling
|
||||
|
||||
After getting a list of tasks, you can cancel all cancelable tasks with the following request:
|
||||
|
||||
```
|
||||
POST _tasks/_cancel
|
||||
```
|
||||
|
||||
Note that not all tasks are cancelable. To see if a task is cancelable, refer to the `cancellable` field in the response to your `tasks` API request.
|
||||
|
||||
You can also cancel a task by including a specific task ID.
|
||||
|
||||
```
|
||||
POST _tasks/<task_id>/_cancel
|
||||
```
|
||||
|
||||
The `cancel` operation supports the same parameters as the `tasks` operation. The following example shows how to cancel all cancelable tasks on multiple nodes.
|
||||
|
||||
```
|
||||
POST _tasks/_cancel?nodes=opensearch-node1,opensearch-node2
|
||||
```
|
||||
|
||||
## Attaching headers to tasks
|
||||
|
||||
To associate requests with tasks for better tracking, you can provide a `X-Opaque-Id:<ID_number>` header as part of the HTTPS request reader of your `curl` command. The API will attach the specified header in the returned result.
|
||||
|
||||
Usage:
|
||||
|
||||
```bash
|
||||
curl -i -H "X-Opaque-Id: 111111" "https://localhost:9200/_tasks" -u 'admin:admin' --insecure
|
||||
```
|
||||
|
||||
The `_tasks` operation returns the following result.
|
||||
|
||||
```json
|
||||
HTTP/1.1 200 OK
|
||||
X-Opaque-Id: 111111
|
||||
content-type: application/json; charset=UTF-8
|
||||
content-length: 768
|
||||
|
||||
{
|
||||
"nodes": {
|
||||
"Mgqdm0r9SEGClWxp_RbnaQ": {
|
||||
"name": "opensearch-node1",
|
||||
"transport_address": "172.18.0.4:9300",
|
||||
"host": "172.18.0.4",
|
||||
"ip": "172.18.0.4:9300",
|
||||
"roles": [
|
||||
"data",
|
||||
"ingest",
|
||||
"master",
|
||||
"remote_cluster_client"
|
||||
],
|
||||
"tasks": {
|
||||
"Mgqdm0r9SEGClWxp_RbnaQ:30072": {
|
||||
"node": "Mgqdm0r9SEGClWxp_RbnaQ",
|
||||
"id": 30072,
|
||||
"type": "direct",
|
||||
"action": "cluster:monitor/tasks/lists[n]",
|
||||
"start_time_in_millis": 1613166701725,
|
||||
"running_time_in_nanos": 245400,
|
||||
"cancellable": false,
|
||||
"parent_task_id": "Mgqdm0r9SEGClWxp_RbnaQ:30071",
|
||||
"headers": {
|
||||
"X-Opaque-Id": "111111"
|
||||
}
|
||||
},
|
||||
"Mgqdm0r9SEGClWxp_RbnaQ:30071": {
|
||||
"node": "Mgqdm0r9SEGClWxp_RbnaQ",
|
||||
"id": 30071,
|
||||
"type": "transport",
|
||||
"action": "cluster:monitor/tasks/lists",
|
||||
"start_time_in_millis": 1613166701725,
|
||||
"running_time_in_nanos": 658200,
|
||||
"cancellable": false,
|
||||
"headers": {
|
||||
"X-Opaque-Id": "111111"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
This operation supports the same parameters as the `tasks` operation. The following example shows how you can associate `X-Opaque-Id` with specific tasks:
|
||||
|
||||
```bash
|
||||
curl -i -H "X-Opaque-Id: 123456" "https://localhost:9200/_tasks?nodes=opensearch-node1" -u 'admin:admin' --insecure
|
||||
```
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
layout: default
|
||||
title: Supported units
|
||||
nav_order: 90
|
||||
---
|
||||
|
||||
# Supported units
|
||||
|
||||
OpenSearch supports the following units for all REST operations:
|
||||
|
||||
Unit | Description | Example
|
||||
:--- | :--- | :---
|
||||
Times | The supported units for time are `d` for days, `h` for hours, `m` for minutes, `s` for seconds, `ms` for milliseconds, `micros` for microseconds, and `nanos` for nanoseconds. | `5d` or `7h`
|
||||
Bytes | The supported units for byte size are `b` for bytes, `kb` for kibibytes, `mb` for mebibytes, `gb` for gibibytes, `tb` for tebibytes, and `pb` for pebibytes. Despite the base-10 abbreviations, these units are base-2; `1kb` is 1,024 bytes, `1mb` is 1,048,576 bytes, etc. | `7kb` or `6gb`
|
||||
Distances | The supported units for distance are `mi` for miles, `yd` for yards, `ft` for feet, `in` for inches, `km` for kilometers, `m` for meters, `cm` for centimeters, `mm` for millimeters, and `nmi` or `NM` for nautical miles. | `5mi` or `4ft`
|
||||
Quantities without units | For large values that don't have a unit, use `k` for kilo, `m` for mega, `g` for giga, `t` for tera, and `p` for peta. | `5k` for 5,000
|
||||
|
||||
To convert output units to human-readable values, see [Common REST parameters](../common-parameters/).
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,165 @@
|
|||
---
|
||||
layout: default
|
||||
title: Anomaly detection
|
||||
nav_order: 46
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Anomaly detection
|
||||
|
||||
An anomaly in OpenSearch is any unusual behavior change in your time-series data. Anomalies can provide valuable insights into your data. For example, for IT infrastructure data, an anomaly in the memory usage metric might help you uncover early signs of a system failure.
|
||||
|
||||
It can be challenging to discover anomalies using conventional methods such as creating visualizations and dashboards. You could configure an alert based on a static threshold, but this requires prior domain knowledge and isn't adaptive to data that exhibits organic growth or seasonal behavior.
|
||||
|
||||
Anomaly detection automatically detects anomalies in your OpenSearch data in near real-time using the Random Cut Forest (RCF) algorithm. RCF is an unsupervised machine learning algorithm that models a sketch of your incoming data stream to compute an `anomaly grade` and `confidence score` value for each incoming data point. These values are used to differentiate an anomaly from normal variations. For more information about how RCF works, see [Random Cut Forests](https://pdfs.semanticscholar.org/8bba/52e9797f2e2cc9a823dbd12514d02f29c8b9.pdf?_ga=2.56302955.1913766445.1574109076-1059151610.1574109076).
|
||||
|
||||
You can pair the anomaly detection plugin with the [alerting plugin](../alerting/) to notify you as soon as an anomaly is detected.
|
||||
|
||||
To use the anomaly detection plugin, your computer needs to have more than one CPU core.
|
||||
{: .note }
|
||||
|
||||
## Get started with Anomaly Detection
|
||||
|
||||
To get started, choose **Anomaly Detection** in OpenSearch Dashboards.
|
||||
To first test with sample streaming data, choose **Sample Detectors** and try out one of the preconfigured detectors.
|
||||
|
||||
### Step 1: Create a detector
|
||||
|
||||
A detector is an individual anomaly detection task. You can create multiple detectors, and all the detectors can run simultaneously, with each analyzing data from different sources.
|
||||
|
||||
1. Choose **Create Detector**.
|
||||
1. Enter a name and brief description. Make sure the name is unique and descriptive enough to help you to identify the purpose of the detector.
|
||||
1. For **Data source**, choose the index you want to use as the data source. You can optionally use index patterns to choose multiple indices.
|
||||
1. Select the **Timestamp field** in your index.
|
||||
1. (Optional) For **Data filter**, filter the index you chose as the data source. From the **Filter type** menu, choose **Visual filter**, and then design your filter query by selecting **Fields**, **Operator**, and **Value**, or choose **Custom Expression** and add your own JSON filter query.
|
||||
1. For **Detector operation settings**, define the **Detector interval**, which is the time interval at which the detector collects data.
|
||||
- The detector aggregates the data in this interval, then feeds the aggregated result into the anomaly detection model.
|
||||
The shorter you set this interval, the fewer data points the detector aggregates.
|
||||
The anomaly detection model uses a shingling process, a technique that uses consecutive data points to create a sample for the model. This process needs a certain number of aggregated data points from contiguous intervals.
|
||||
- We recommend setting the detector interval based on your actual data. If it's too long it might delay the results, and if it's too short it might miss some data. It also won't have a sufficient number of consecutive data points for the shingle process.
|
||||
1. (Optional) To add extra processing time for data collection, specify a **Window delay** value. This value tells the detector that the data is not ingested into OpenSearch in real time but with a certain delay.
|
||||
Set the window delay to shift the detector interval to account for this delay.
|
||||
- For example, say the detector interval is 10 minutes and data is ingested into your cluster with a general delay of 1 minute.
|
||||
Assume the detector runs at 2:00. The detector attempts to get the last 10 minutes of data from 1:50 to 2:00, but because of the 1-minute delay, it only gets 9 minutes of data and misses the data from 1:59 to 2:00.
|
||||
Setting the window delay to 1 minute shifts the interval window to 1:49 - 1:59, so the detector accounts for all 10 minutes of the detector interval time.
|
||||
1. Choose **Create**.
|
||||
|
||||
After you create the detector, the next step is to add features to it.
|
||||
|
||||
### Step 2: Add features to your detector
|
||||
|
||||
A feature is the field in your index that you want to check for anomalies. A detector can discover anomalies across one or more features. You must choose an aggregation method for each feature: `average()`, `count()`, `sum()`, `min()`, or `max()`. The aggregation method determines what constitutes an anomaly.
|
||||
|
||||
For example, if you choose `min()`, the detector focuses on finding anomalies based on the minimum values of your feature. If you choose `average()`, the detector finds anomalies based on the average values of your feature.
|
||||
|
||||
A multi-feature model correlates anomalies across all its features. The [curse of dimensionality](https://en.wikipedia.org/wiki/Curse_of_dimensionality) makes it less likely for multi-feature models to identify smaller anomalies as compared to a single-feature model. Adding more features might negatively impact the [precision and recall](https://en.wikipedia.org/wiki/Precision_and_recall) of a model. A higher proportion of noise in your data might further amplify this negative impact. Selecting the optimal feature set is usually an iterative process. We recommend experimenting with a historical detector with different feature sets and checking the precision before moving on to real-time detectors. By default, the maximum number of features for a detector is 5. You can adjust this limit with the `plugins.anomaly_detection.max_anomaly_features` setting.
|
||||
{: .note }
|
||||
|
||||
1. On the **Model configuration** page, enter the **Feature name**.
|
||||
1. For **Find anomalies based on**, choose the method to find anomalies. For **Field Value** menu, choose the **field** and the **aggregation method**. Or choose **Custom expression**, and add your own JSON aggregation query.
|
||||
|
||||
#### (Optional) Set a category field for high cardinality
|
||||
|
||||
You can categorize anomalies based on a keyword or IP field type.
|
||||
|
||||
The category field categorizes or slices the source time series with a dimension like IP addresses, product IDs, country codes, and so on. This helps to see a granular view of anomalies within each entity of the category field to isolate and debug issues.
|
||||
|
||||
To set a category field, choose **Enable a category field** and select a field.
|
||||
|
||||
Only a certain number of unique entities are supported in the category field. Use the following equation to calculate the recommended total number of entities supported in a cluster:
|
||||
|
||||
```
|
||||
(data nodes * heap size * anomaly detection maximum memory percentage) / (entity size of a detector)
|
||||
```
|
||||
|
||||
This formula provides a good starting point, but make sure to test with a representative workload.
|
||||
{: .note }
|
||||
|
||||
For example, for a cluster with 3 data nodes, each with 8G of JVM heap size, a maximum memory percentage of 10% (default), and the entity size of the detector as 1MB: the total number of unique entities supported is (8.096 * 10^9 * 0.1 / 1M ) * 3 = 2429.
|
||||
|
||||
#### Set a window size
|
||||
|
||||
Set the number of aggregation intervals from your data stream to consider in a detection window. It's best to choose this value based on your actual data to see which one leads to the best results for your use case.
|
||||
|
||||
Based on experiments performed on a wide variety of one-dimensional data streams, we recommend using a window size between 1 and 16. The default window size is 8. If you set the category field for high cardinality, the default window size is 1.
|
||||
|
||||
If you expect missing values in your data or if you want to base the anomalies on the current interval, choose 1. If your data is continuously ingested and you want to base the anomalies on multiple intervals, choose a larger window size.
|
||||
|
||||
#### Preview sample anomalies
|
||||
|
||||
Preview sample anomalies and adjust the feature settings if needed.
|
||||
For sample previews, the anomaly detection plugin selects a small number of data samples---for example, one data point every 30 minutes---and uses interpolation to estimate the remaining data points to approximate the actual feature data. It loads this sample dataset into the detector. The detector uses this sample dataset to generate a sample preview of anomaly results.
|
||||
|
||||
Examine the sample preview and use it to fine-tune your feature configurations (for example, enable or disable features) to get more accurate results.
|
||||
|
||||
1. Choose **Save and start detector**.
|
||||
1. Choose between automatically starting the detector (recommended) or manually starting the detector at a later time.
|
||||
|
||||
### Step 3: Observe the results
|
||||
|
||||
Choose the **Anomaly results** tab. You need to wait for some time to see the anomaly results. If the detector interval is 10 minutes, the detector might take more than an hour to start, as it's waiting for sufficient data to generate anomalies.
|
||||
|
||||
A shorter interval means the model passes the shingle process more quickly and starts to generate the anomaly results sooner.
|
||||
Use the [profile detector](./api#profile-detector) operation to make sure you have sufficient data points.
|
||||
|
||||
If you see the detector pending in "initialization" for longer than a day, aggregate your existing data using the detector interval to check for any missing data points. If you find a lot of missing data points from the aggregated data, consider increasing the detector interval.
|
||||
|
||||
![Anomaly detection results](../images/ad.png)
|
||||
|
||||
Analize anomalies with the following visualizations:
|
||||
|
||||
- **Live anomalies** - displays live anomaly results for the last 60 intervals. For example, if the interval is 10, it shows results for the last 600 minutes. The chart refreshes every 30 seconds.
|
||||
- **Anomaly history** - plots the anomaly grade with the corresponding measure of confidence.
|
||||
- **Feature breakdown** - plots the features based on the aggregation method. You can vary the date-time range of the detector.
|
||||
- **Anomaly occurrence** - shows the `Start time`, `End time`, `Data confidence`, and `Anomaly grade` for each detected anomaly.
|
||||
|
||||
`Anomaly grade` is a number between 0 and 1 that indicates how anomalous a data point is. An anomaly grade of 0 represents “not an anomaly,” and a non-zero value represents the relative severity of the anomaly.
|
||||
|
||||
`Data confidence` is an estimate of the probability that the reported anomaly grade matches the expected anomaly grade. Confidence increases as the model observes more data and learns the data behavior and trends. Note that confidence is distinct from model accuracy.
|
||||
|
||||
If you set the category field, you see an additional **Heat map** chart. The heat map correlates results for anomalous entities. This chart is empty until you select an anomalous entity. You also see the anomaly and feature line chart for the time period of the anomaly (`anomaly_grade` > 0).
|
||||
|
||||
Choose a filled rectangle to see a more detailed view of the anomaly.
|
||||
{: .note }
|
||||
|
||||
### Step 4: Set up alerts
|
||||
|
||||
Choose **Set up alerts** and configure a monitor to notify you when anomalies are detected. For steps to create a monitor and set up notifications based on your anomaly detector, see [Monitors](../alerting/monitors/).
|
||||
|
||||
If you stop or delete a detector, make sure to delete any monitors associated with it.
|
||||
|
||||
### Step 5: Adjust the model
|
||||
|
||||
To see all the configuration settings for a detector, choose the **Detector configuration** tab.
|
||||
|
||||
1. To make any changes to the detector configuration, or fine tune the time interval to minimize any false positives, go to the **Detector configuration** section and choose **Edit**.
|
||||
- You need to stop the detector to change its configuration. Confirm that you want to stop the detector and proceed.
|
||||
1. To enable or disable features, in the **Features** section, choose **Edit** and adjust the feature settings as needed. After you make your changes, choose **Save and start detector**.
|
||||
- Choose between automatically starting the detector (recommended) or manually starting the detector at a later time.
|
||||
|
||||
### Step 6: Analyze historical data
|
||||
|
||||
Analyzing historical data helps you get familiar with the anomaly detection plugin. You can also evaluate the performance of a detector with historical data to further fine-tune it.
|
||||
|
||||
To use a historical detector, you need to specify a date range that has data present in at least 1,000 detection intervals.
|
||||
{: .note }
|
||||
|
||||
1. Choose **Historical detectors** and **Create historical detector**.
|
||||
1. Enter the **Name** of the detector and a brief **Description**.
|
||||
1. For **Data source**, choose the index to use as the data source. You can optionally use index patterns to choose multiple indices.
|
||||
1. For **Time range**, select a time range for historical analysis.
|
||||
1. For **Detector settings**, choose to use the settings of an existing detector. Or choose the **Timestamp field** in your index, add individual features to the detector, and set the detector interval.
|
||||
1. (Optional) Choose to run the historical detector automatically after creating it.
|
||||
1. Choose **Create**.
|
||||
- You can stop the historical detector even before it completes.
|
||||
|
||||
### Step 7: Manage your detectors
|
||||
|
||||
To change or delete a detector, go to the **Detector details** page.
|
||||
|
||||
1. To make changes to your detector, choose the detector name.
|
||||
1. Choose **Actions** and **Edit detector**.
|
||||
- You need to stop the detector to change its configuration. Confirm that you want to stop the detector and proceed.
|
||||
1. Make your changes and choose **Save changes**.
|
||||
|
||||
To delete your detector, choose **Actions** and **Delete detector**. In the pop-up box, type `delete` to confirm and choose **Delete**.
|
|
@ -0,0 +1,86 @@
|
|||
---
|
||||
layout: default
|
||||
title: Anomaly detection security
|
||||
nav_order: 10
|
||||
parent: Anomaly detection
|
||||
has_children: false
|
||||
---
|
||||
|
||||
# Anomaly detection security
|
||||
|
||||
You can use the security plugin with anomaly detection in OpenSearch to limit non-admin users to specific actions. For example, you might want some users to only be able to create, update, or delete detectors, while others to only view detectors.
|
||||
|
||||
All anomaly detection indices are protected as system indices. Only a super admin user or an admin user with a TLS certificate can access system indices. For more information, see [System indices](../../security/configuration/system-indices/).
|
||||
|
||||
|
||||
Security for anomaly detection works the same as [security for alerting](../../alerting/security/).
|
||||
|
||||
## Basic permissions
|
||||
|
||||
As an admin user, you can use the security plugin to assign specific permissions to users based on which APIs they need access to. For a list of supported APIs, see [Anomaly detection API](../api/).
|
||||
|
||||
The security plugin has two built-in roles that cover most anomaly detection use cases: `anomaly_full_access` and `anomaly_read_access`. For descriptions of each, see [Predefined roles](../../security/access-control/users-roles/#predefined-roles).
|
||||
|
||||
If these roles don't meet your needs, mix and match individual anomaly detection [permissions](../../security/access-control/permissions/) to suit your use case. Each action corresponds to an operation in the REST API. For example, the `cluster:admin/opensearch/ad/detector/delete` permission lets you delete detectors.
|
||||
|
||||
## (Advanced) Limit access by backend role
|
||||
|
||||
Use backend roles to configure fine-grained access to individual detectors based on roles. For example, users of different departments in an organization can view detectors owned by their own department.
|
||||
|
||||
First, make sure your users have the appropriate [backend roles](../../security/access-control/). Backend roles usually come from an [LDAP server](../../security/configuration/ldap/) or [SAML provider](../../security/configuration/saml/), but if you use the internal user database, you can use the REST API to [add them manually](../../security/access-control/api/#create-user).
|
||||
|
||||
Next, enable the following setting:
|
||||
|
||||
```json
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"transient": {
|
||||
"plugins.anomaly_detection.filter_by_backend_roles": "true"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Now when users view anomaly detection resources in OpenSearch Dashboards (or make REST API calls), they only see detectors created by users who share at least one backend role.
|
||||
For example, consider two users: `alice` and `bob`.
|
||||
|
||||
`alice` has an analyst backend role:
|
||||
|
||||
```json
|
||||
PUT _plugins/_security/api/internalusers/alice
|
||||
{
|
||||
"password": "alice",
|
||||
"backend_roles": [
|
||||
"analyst"
|
||||
],
|
||||
"attributes": {}
|
||||
}
|
||||
```
|
||||
|
||||
`bob` has a human-resources backend role:
|
||||
|
||||
```json
|
||||
PUT _plugins/_security/api/internalusers/bob
|
||||
{
|
||||
"password": "bob",
|
||||
"backend_roles": [
|
||||
"human-resources"
|
||||
],
|
||||
"attributes": {}
|
||||
}
|
||||
```
|
||||
|
||||
Both `alice` and `bob` have full access to anomaly detection:
|
||||
|
||||
```json
|
||||
PUT _plugins/_security/api/rolesmapping/anomaly_full_access
|
||||
{
|
||||
"backend_roles": [],
|
||||
"hosts": [],
|
||||
"users": [
|
||||
"alice",
|
||||
"bob"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Because they have different backend roles, `alice` and `bob` cannot view each other's detectors or their results.
|
|
@ -0,0 +1,42 @@
|
|||
---
|
||||
layout: default
|
||||
title: Settings
|
||||
parent: Anomaly detection
|
||||
nav_order: 4
|
||||
---
|
||||
|
||||
# Settings
|
||||
|
||||
The anomaly detection plugin adds several settings to the standard OpenSearch cluster settings.
|
||||
The settings are dynamic, so you can change the default behavior of the plugin without restarting your cluster.
|
||||
You can mark settings as `persistent` or `transient`.
|
||||
|
||||
For example, to update the retention period of the result index:
|
||||
|
||||
```json
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"transient": {
|
||||
"plugins.anomaly_detection.ad_result_history_retention_period": "5m"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Setting | Default | Description
|
||||
:--- | :--- | :---
|
||||
`plugins.anomaly_detection.enabled` | True | Whether the anomaly detection plugin is enabled or not. If disabled, all detectors immediately stop running.
|
||||
`plugins.anomaly_detection.max_anomaly_detectors` | 1,000 | The maximum number of non-high cardinality detectors (no category field) users can create.
|
||||
`plugins.anomaly_detection.max_multi_entity_anomaly_detectors` | 10 | The maximum number of high cardinality detectors (with category field) in a cluster.
|
||||
`plugins.anomaly_detection.max_anomaly_features` | 5 | The maximum number of features for a detector.
|
||||
`plugins.anomaly_detection.ad_result_history_rollover_period` | 12h | How often the rollover condition is checked. If `true`, the plugin rolls over the result index to a new index.
|
||||
`plugins.anomaly_detection.ad_result_history_max_docs` | 250000000 | The maximum number of documents in one result index. The plugin only counts refreshed documents in the primary shards.
|
||||
`plugins.anomaly_detection.ad_result_history_retention_period` | 30d | The maximum age of the result index. If its age exceeds the threshold, the plugin deletes the rolled over result index. If the cluster has only one result index, the plugin keeps the index even if it's older than its configured retention period.
|
||||
`plugins.anomaly_detection.max_entities_per_query` | 1,000 | The maximum unique values per detection interval for high cardinality detectors. By default, if the category field has more than 1,000 unique values in a detector interval, the plugin selects the top 1,000 values and orders them by `doc_count`.
|
||||
`plugins.anomaly_detection.max_entities_for_preview` | 30 | The maximum unique category field values displayed with the preview operation for high cardinality detectors. If the category field has more than 30 unique values, the plugin selects the top 30 values and orders them by `doc_count`.
|
||||
`plugins.anomaly_detection.max_primary_shards` | 10 | The maximum number of primary shards an anomaly detection index can have.
|
||||
`plugins.anomaly_detection.filter_by_backend_roles` | False | When you enable the security plugin and set this to `true`, the plugin filters results based on the user's backend role(s).
|
||||
`plugins.anomaly_detection.max_cache_miss_handling_per_second` | 100 | High cardinality detectors use a cache to store active models. In the event of a cache miss, the cache gets the models from the model checkpoint index. Use this setting to limit the rate of fetching models. Because the thread pool for a GET operation has a queue of 1,000, we recommend setting this value below 1,000.
|
||||
`plugins.anomaly_detection.max_batch_task_per_node` | 2 | Starting a historical detector triggers a batch task. This setting is the number of batch tasks that you can run per data node. You can tune this setting from 1 to 1000. If the data nodes can't support all batch tasks and you're not sure if the data nodes are capable of running more historical detectors, add more data nodes instead of changing this setting to a higher value.
|
||||
`plugins.anomaly_detection.max_old_ad_task_docs_per_detector` | 10 | You can run the same historical detector many times. For each run, the anomaly detection plugin creates a new task. This setting is the number of previous tasks the plugin keeps. Set this value to at least 1 to track its last run. You can keep a maximum of 1,000 old tasks to avoid overwhelming the cluster.
|
||||
`plugins.anomaly_detection.batch_task_piece_size` | 1000 | The date range for a historical task is split into smaller pieces and the anomaly detection plugin runs the task piece by piece. Each piece contains 1,000 detection intervals by default. For example, if detector interval is 1 minute and one piece is 1000 minutes, the feature data is queried every 1,000 minutes. You can change this setting from 1 to 10,000.
|
||||
`plugins.anomaly_detection.batch_task_piece_interval_seconds` | 5 | Add a time interval between historical detector tasks. This interval prevents the task from consuming too much of the available resources and starving other operations like search and bulk index. You can change this setting from 1 to 600 seconds.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,64 @@
|
|||
---
|
||||
layout: default
|
||||
title: Cron
|
||||
nav_order: 20
|
||||
parent: Alerting
|
||||
has_children: false
|
||||
---
|
||||
|
||||
# Cron expression reference
|
||||
|
||||
Monitors can run at a variety of fixed intervals (e.g. hourly, daily, etc.), but you can also define custom cron expressions for when they should run. Monitors use the Unix cron syntax and support five fields:
|
||||
|
||||
Field | Valid values
|
||||
:--- | :---
|
||||
Minute | 0-59
|
||||
Hour | 0-23
|
||||
Day of month | 1-31
|
||||
Month | 1-12
|
||||
Day of week | 0-7 (0 and 7 are both Sunday) or SUN, MON, TUE, WED, THU, FRI, SAT
|
||||
|
||||
For example, the following expression translates to "every Monday through Friday at 11:30 AM":
|
||||
|
||||
```
|
||||
30 11 * * 1-5
|
||||
```
|
||||
|
||||
|
||||
## Features
|
||||
|
||||
Feature | Description
|
||||
:--- | :---
|
||||
`*` | Wildcard. Specifies all valid values.
|
||||
`,` | List. Use to specify several values (e.g. `1,15,30`).
|
||||
`-` | Range. Use to specify a range of values (e.g. `1-15`).
|
||||
`/` | Step. Use after a wildcard or range to specify the "step" between values. For example, `0-11/2` is equivalent to `0,2,4,6,8,10`.
|
||||
|
||||
Note that you can specify the day using two fields: day of month and day of week. For most situations, we recommend that you use just one of these fields and leave the other as `*`.
|
||||
|
||||
If you use a non-wildcard value in both fields, the monitor runs when either field matches the time. For example, `15 2 1,15 * 1` causes the monitor to run at 2:15 AM on the 1st of the month, the 15th of the month, and every Monday.
|
||||
|
||||
|
||||
## Sample expressions
|
||||
|
||||
Every other day at 1:45 PM:
|
||||
|
||||
```
|
||||
45 13 1-31/2 * *
|
||||
```
|
||||
|
||||
Every 10 minutes on Saturday and Sunday:
|
||||
|
||||
```
|
||||
0/10 * * * 6-7
|
||||
```
|
||||
|
||||
Every three hours on the first day of every other month:
|
||||
|
||||
```
|
||||
0 0-23/3 1 1-12/2 *
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
For an example of how to use a custom cron expression in an API call, see the [create monitor API operation](../api/#request-1).
|
|
@ -0,0 +1,16 @@
|
|||
---
|
||||
layout: default
|
||||
title: Alerting
|
||||
nav_order: 34
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Alerting
|
||||
OpenSearch Dashboards
|
||||
{: .label .label-yellow :}
|
||||
|
||||
The alerting feature notifies you when data from one or more OpenSearch indices meets certain conditions. For example, you might want to notify a [Slack](https://slack.com/) channel if your application logs more than five HTTP 503 errors in one hour, or you might want to page a developer if no new documents have been indexed in the past 20 minutes.
|
||||
|
||||
To get started, choose **Alerting** in OpenSearch Dashboards.
|
||||
|
||||
![OpenSearch Dashboards side bar with link](../images/alerting.png)
|
|
@ -0,0 +1,373 @@
|
|||
---
|
||||
layout: default
|
||||
title: Monitors
|
||||
nav_order: 1
|
||||
parent: Alerting
|
||||
has_children: false
|
||||
---
|
||||
|
||||
# Monitors
|
||||
|
||||
#### Table of contents
|
||||
- TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Key terms
|
||||
|
||||
Term | Definition
|
||||
:--- | :---
|
||||
Monitor | A job that runs on a defined schedule and queries OpenSearch. The results of these queries are then used as input for one or more *triggers*.
|
||||
Trigger | Conditions that, if met, generate *alerts*.
|
||||
Alert | An event associated with a trigger. When an alert is created, the trigger performs *actions*, which can include sending a notification.
|
||||
Action | The information that you want the monitor to send out after being triggered. Actions have a *destination*, a message subject, and a message body.
|
||||
Destination | A reusable location for an action, such as Amazon Chime, Slack, or a webhook URL.
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Create destinations
|
||||
|
||||
1. Choose **Alerting**, **Destinations**, **Add destination**.
|
||||
1. Specify a name for the destination so that you can identify it later.
|
||||
1. For **Type**, choose Slack, Amazon Chime, custom webhook, or [email](#email-as-a-destination).
|
||||
|
||||
For Email type, refer to [Email as a destination](#email-as-a-destination) section below. For all other types, specify the webhook URL. For more information about webhooks, see the documentation for [Slack](https://api.slack.com/incoming-webhooks) and [Chime](https://docs.aws.amazon.com/chime/latest/ug/webhooks.html).
|
||||
|
||||
For custom webhooks, you must specify more information: parameters and headers. For example, if your endpoint requires basic authentication, you might need to add a header with a key of `Authorization` and a value of `Basic <Base64-encoded-credential-string>`. You might also need to change `Content-Type` to whatever your webhook requires. Popular values are `application/json`, `application/xml`, and `text/plain`.
|
||||
|
||||
This information is stored in plain text in the OpenSearch cluster. We will improve this design in the future, but for now, the encoded credentials (which are neither encrypted nor hashed) might be visible to other OpenSearch users.
|
||||
|
||||
|
||||
### Email as a destination
|
||||
|
||||
To send or receive an alert notification as an email, choose **Email** as the destination type. Next, add at least one sender and recipient. We recommend adding email groups if you want to notify more than a few people of an alert. You can configure senders and recipients using **Manage senders** and **Manage email groups**.
|
||||
|
||||
|
||||
#### Manage senders
|
||||
|
||||
Senders are email accounts from which the alerting plugin sends notifications.
|
||||
|
||||
To configure a sender email, do the following:
|
||||
|
||||
1. After you choose **Email** as the destination type, choose **Manage senders**.
|
||||
1. Choose **Add sender**, **New sender** and enter a unique name.
|
||||
1. Enter the email address, SMTP host (e.g. `smtp.gmail.com` for a Gmail account), and the port.
|
||||
1. Choose an encryption method, or use the default value of **None**. However, most email providers require SSL or TLS, which requires a username and password in OpenSearch keystore. Refer to [Authenticate sender account](#authenticate-sender-account) to learn more.
|
||||
1. Choose **Save** to save the configuration and create the sender. You can create a sender even before you add your credentials to the OpenSearch keystore. However, you must [authenticate each sender account](#authenticate-sender-account) before you use the destination to send your alert.
|
||||
|
||||
You can reuse senders across many different destinations, but each destination only supports one sender.
|
||||
|
||||
|
||||
#### Manage email groups or recipients
|
||||
|
||||
Use email groups to create and manage reusable lists of email addresses. For example, one alert might email the DevOps team, whereas another might email the executive team and the engineering team.
|
||||
|
||||
You can enter individual email addresses or an email group in the **Recipients** field.
|
||||
|
||||
1. After you choose **Email** as the destination type, choose **Manage email groups**. Then choose **Add email group**, **New email group**.
|
||||
1. Enter a unique name.
|
||||
1. For recipient emails, enter any number of email addresses.
|
||||
1. Choose **Save**.
|
||||
|
||||
|
||||
#### Authenticate sender account
|
||||
|
||||
If your email provider requires SSL or TLS, you must authenticate each sender account before you can send an email. Enter these credentials in the OpenSearch keystore using the CLI. Run the following commands (in your OpenSearch directory) to enter your username and password. The `<sender_name>` is the name you entered for **Sender** earlier.
|
||||
|
||||
```bash
|
||||
./bin/opensearch-keystore add opendistro.alerting.destination.email.<sender_name>.username
|
||||
./bin/opensearch-keystore add opendistro.alerting.destination.email.<sender_name>.password
|
||||
```
|
||||
|
||||
**Note**: Keystore settings are node-specific. You must run these commands on each node.
|
||||
{: .note}
|
||||
|
||||
To change or update your credentials (after you've added them to the keystore on every node), call the reload API to automatically update those credentials without restarting OpenSearch:
|
||||
|
||||
```json
|
||||
POST _nodes/reload_secure_settings
|
||||
{
|
||||
"secure_settings_password": "1234"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Create monitors
|
||||
|
||||
1. Choose **Alerting**, **Monitors**, **Create monitor**.
|
||||
1. Specify a name for the monitor.
|
||||
|
||||
The anomaly detection option is for pairing with the anomaly detection plugin. See [Anomaly Detection](../../ad/).
|
||||
For anomaly detector, choose an appropriate schedule for the monitor based on the detector interval. Otherwise, the alerting monitor might miss reading the results.
|
||||
|
||||
For example, assume you set the monitor interval and the detector interval as 5 minutes, and you start the detector at 12:00. If an anomaly is detected at 12:05, it might be available at 12:06 because of the delay between writing the anomaly and it being available for queries. The monitor reads the anomaly results between 12:00 and 12:05, so it does not get the anomaly results available at 12:06.
|
||||
|
||||
To avoid this issue, make sure the alerting monitor is at least twice the detector interval.
|
||||
When you create a monitor using OpenSearch Dashboards, the anomaly detector plugin generates a default monitor schedule that's twice the detector interval.
|
||||
|
||||
Whenever you update a detector’s interval, make sure to update the associated monitor interval as well, as the anomaly detection plugin does not do this automatically.
|
||||
|
||||
1. Choose one or more indices. You can also use `*` as a wildcard to specify an index pattern.
|
||||
|
||||
If you use the security plugin, you can only choose indices that you have permission to access. For details, see [Alerting security](../security/).
|
||||
|
||||
1. Define the monitor in one of three ways: visually, using a query, or using an anomaly detector.
|
||||
|
||||
- Visual definition works well for monitors that you can define as "some value is above or below some threshold for some amount of time."
|
||||
|
||||
- Query definition gives you flexibility in terms of what you query for (using [the OpenSearch query DSL](../../opensearch/full-text)) and how you evaluate the results of that query (Painless scripting).
|
||||
|
||||
This example averages the `cpu_usage` field:
|
||||
|
||||
```json
|
||||
{
|
||||
"size": 0,
|
||||
"query": {
|
||||
"match_all": {}
|
||||
},
|
||||
"aggs": {
|
||||
"avg_cpu": {
|
||||
"avg": {
|
||||
"field": "cpu_usage"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can even filter query results using `{% raw %}{{period_start}}{% endraw %}` and `{% raw %}{{period_end}}{% endraw %}`:
|
||||
|
||||
```json
|
||||
{
|
||||
"size": 0,
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": [{
|
||||
"range": {
|
||||
"timestamp": {
|
||||
"from": "{% raw %}{{period_end}}{% endraw %}||-1h",
|
||||
"to": "{% raw %}{{period_end}}{% endraw %}",
|
||||
"include_lower": true,
|
||||
"include_upper": true,
|
||||
"format": "epoch_millis",
|
||||
"boost": 1
|
||||
}
|
||||
}
|
||||
}],
|
||||
"adjust_pure_negative": true,
|
||||
"boost": 1
|
||||
}
|
||||
},
|
||||
"aggregations": {}
|
||||
}
|
||||
```
|
||||
|
||||
"Start" and "end" refer to the interval at which the monitor runs. See [Available variables](#available-variables).
|
||||
|
||||
|
||||
1. To define a monitor visually, choose **Define using visual graph**. Then choose an aggregation (for example, `count()` or `average()`), a set of documents, and a timeframe. Visual definition works well for most monitors.
|
||||
|
||||
To use a query, choose **Define using extraction query**, add your query (using [the OpenSearch query DSL](../../opensearch/full-text/)), and test it using the **Run** button.
|
||||
|
||||
The monitor makes this query to OpenSearch as often as the schedule dictates; check the **Query Performance** section and make sure you're comfortable with the performance implications.
|
||||
|
||||
To use an anomaly detector, choose **Define using Anomaly detector** and select your **Detector**.
|
||||
1. Choose a frequency and timezone for your monitor. Note that you can only pick a timezone if you choose Daily, Weekly, Monthly, or [custom cron expression](../cron/) for frequency.
|
||||
1. Choose **Create**.
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Create triggers
|
||||
|
||||
The next step in creating a monitor is to create a trigger. These steps differ depending on whether you chose **Define using visual graph** or **Define using extraction query** or **Define using Anomaly detector** when you created the monitor.
|
||||
|
||||
Either way, you begin by specifying a name and severity level for the trigger. Severity levels help you manage alerts. A trigger with a high severity level (e.g. 1) might page a specific individual, whereas a trigger with a low severity level might message a chat room.
|
||||
|
||||
|
||||
### Visual graph
|
||||
|
||||
For **Trigger condition**, specify a threshold for the aggregation and timeframe you chose earlier, such as "is below 1,000" or "is exactly 10."
|
||||
|
||||
The line moves up and down as you increase and decrease the threshold. Once this line is crossed, the trigger evaluates to true.
|
||||
|
||||
|
||||
### Extraction query
|
||||
|
||||
For **Trigger condition**, specify a Painless script that returns true or false. Painless is the default OpenSearch scripting language and has a syntax similar to Groovy.
|
||||
|
||||
Trigger condition scripts revolve around the `ctx.results[0]` variable, which corresponds to the extraction query response. For example, your script might reference `ctx.results[0].hits.total.value` or `ctx.results[0].hits.hits[i]._source.error_code`.
|
||||
|
||||
A return value of true means the trigger condition has been met, and the trigger should execute its actions. Test your script using the **Run** button.
|
||||
|
||||
The **Info** link next to **Trigger condition** contains a useful summary of the variables and results available to your query.
|
||||
{: .tip }
|
||||
|
||||
|
||||
### Anomaly detector
|
||||
|
||||
For **Trigger type**, choose **Anomaly detector grade and confidence**.
|
||||
|
||||
Specify the **Anomaly grade condition** for the aggregation and timeframe you chose earlier, "IS ABOVE 0.7" or "IS EXACTLY 0.5." The *anomaly grade* is a number between 0 and 1 that indicates the level of severity of how anomalous a data point is.
|
||||
|
||||
Specify the **Anomaly confidence condition** for the aggregation and timeframe you chose earlier, "IS ABOVE 0.7" or "IS EXACTLY 0.5." The *anomaly confidence* is an estimate of the probability that the reported anomaly grade matches the expected anomaly grade.
|
||||
|
||||
The line moves up and down as you increase and decrease the threshold. Once this line is crossed, the trigger evaluates to true.
|
||||
|
||||
|
||||
#### Sample scripts
|
||||
|
||||
{::comment}
|
||||
These scripts are Painless, not Groovy, but calling them Groovy in Jekyll gets us syntax highlighting in the generated HTML.
|
||||
{:/comment}
|
||||
|
||||
```groovy
|
||||
// Evaluates to true if the query returned any documents
|
||||
ctx.results[0].hits.total.value > 0
|
||||
```
|
||||
|
||||
```groovy
|
||||
// Returns true if the avg_cpu aggregation exceeds 90
|
||||
if (ctx.results[0].aggregations.avg_cpu.value > 90) {
|
||||
return true;
|
||||
}
|
||||
```
|
||||
|
||||
```groovy
|
||||
// Performs some crude custom scoring and returns true if that score exceeds a certain value
|
||||
int score = 0;
|
||||
for (int i = 0; i < ctx.results[0].hits.hits.length; i++) {
|
||||
// Weighs 500 errors 10 times as heavily as 503 errors
|
||||
if (ctx.results[0].hits.hits[i]._source.http_status_code == "500") {
|
||||
score += 10;
|
||||
} else if (ctx.results[0].hits.hits[i]._source.http_status_code == "503") {
|
||||
score += 1;
|
||||
}
|
||||
}
|
||||
if (score > 99) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
```
|
||||
|
||||
Below are some variables you can include in your message using Mustache templates to see more information about your monitors.
|
||||
|
||||
### Available variables
|
||||
|
||||
#### Monitor variables
|
||||
|
||||
Variable | Data Type | Description
|
||||
:--- | :--- | :---
|
||||
`ctx.monitor` | JSON | Includes `ctx.monitor.name`, `ctx.monitor.type`, `ctx.monitor.enabled`, `ctx.monitor.enabled_time`, `ctx.monitor.schedule`, `ctx.monitor.inputs`, `triggers` and `ctx.monitor.last_update_time`.
|
||||
`ctx.monitor.user` | JSON | Includes information about the user who created the monitor. Includes `ctx.monitor.user.backend_roles` and `ctx.monitor.user.roles`, which are arrays that contain the backend roles and roles assigned to the user. See [alerting security](../security/) for more information.
|
||||
`ctx.monitor.enabled` | Boolean | Whether the monitor is enabled.
|
||||
`ctx.monitor.enabled_time` | Milliseconds | Unix epoch time of when the monitor was last enabled.
|
||||
`ctx.monitor.schedule` | JSON | Contains a schedule of how often or when the monitor should run.
|
||||
`ctx.monitor.schedule.period.interval` | Integer | The interval at which the monitor runs.
|
||||
`ctx.monitor.schedule.period.unit` | String | The interval's unit of time.
|
||||
`ctx.monitor.inputs` | Array | An array that contains the indices and definition used to create the monitor.
|
||||
`ctx.monitor.inputs.search.indices` | Array | An array that contains the indices the monitor observes.
|
||||
`ctx.monitor.inputs.search.query` | N/A | The definition used to define the monitor.
|
||||
|
||||
#### Trigger variables
|
||||
|
||||
Variable | Data Type | Description
|
||||
:--- | :--- | : ---
|
||||
`ctx.trigger.id` | String | The trigger's ID.
|
||||
`ctx.trigger.name` | String | The trigger's name.
|
||||
`ctx.trigger.severity` | String | The trigger's severity.
|
||||
`ctx.trigger.condition`| JSON | Contains the Painless script used when creating the monitor.
|
||||
`ctx.trigger.condition.script.source` | String | The language used to define the script. Must be painless.
|
||||
`ctx.trigger.condition.script.lang` | String | The script used to define the trigger.
|
||||
`ctx.trigger.actions`| Array | An array with one element that contains information about the action the monitor needs to trigger.
|
||||
|
||||
#### Action variables
|
||||
|
||||
Variable | Data Type | Description
|
||||
:--- | :--- | : ---
|
||||
`ctx.trigger.actions.id` | String | The action's ID.
|
||||
`ctx.trigger.actions.name` | String | The action's name.
|
||||
`ctx.trigger.actions.destination_id`| String | The alert destination's ID.
|
||||
`ctx.trigger.actions.message_template.source` | String | The message to send in the alert.
|
||||
`ctx.trigger.actions.message_template.lang` | String | The scripting language used to define the message. Must be Mustache.
|
||||
`ctx.trigger.actions.throttle_enabled` | Boolean | Whether throttling is enabled for this trigger. See [adding actions](#add-actions/) for more information about throttling.
|
||||
`ctx.trigger.actions.subject_template.source` | String | The message's subject in the alert.
|
||||
`ctx.trigger.actions.subject_template.lang` | String | The scripting language used to define the subject. Must be mustache.
|
||||
|
||||
#### Other variables
|
||||
|
||||
Variable | Data Type | Description
|
||||
:--- | :--- : :---
|
||||
`ctx.results` | Array | An array with one element (i.e. `ctx.results[0]`). Contains the query results. This variable is empty if the trigger was unable to retrieve results. See `ctx.error`.
|
||||
`ctx.last_update_time` | Milliseconds | Unix epoch time of when the monitor was last updated.
|
||||
`ctx.periodStart` | String | Unix timestamp for the beginning of the period during which the alert triggered. For example, if a monitor runs every ten minutes, a period might begin at 10:40 and end at 10:50.
|
||||
`ctx.periodEnd` | String | The end of the period during which the alert triggered.
|
||||
`ctx.error` | String | The error message if the trigger was unable to retrieve results or unable to evaluate the trigger, typically due to a compile error or null pointer exception. Null otherwise.
|
||||
`ctx.alert` | JSON | The current, active alert (if it exists). Includes `ctx.alert.id`, `ctx.alert.version`, and `ctx.alert.isAcknowledged`. Null if no alert is active.
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Add actions
|
||||
|
||||
The final step in creating a monitor is to add one or more actions. Actions send notifications when trigger conditions are met and support [Slack](https://slack.com/), [Amazon Chime](https://aws.amazon.com/chime/), and webhooks.
|
||||
|
||||
If you don't want to receive notifications for alerts, you don't have to add actions to your triggers. Instead, you can periodically check OpenSearch Dashboards.
|
||||
{: .tip }
|
||||
|
||||
1. Specify a name for the action.
|
||||
1. Choose a destination.
|
||||
1. Add a subject and body for the message.
|
||||
|
||||
You can add variables to your messages using [Mustache templates](https://mustache.github.io/mustache.5.html). You have access to `ctx.action.name`, the name of the current action, as well as all [trigger variables](#available-variables).
|
||||
|
||||
If your destination is a custom webhook that expects a particular data format, you might need to include JSON (or even XML) directly in the message body:
|
||||
|
||||
```json
|
||||
{% raw %}{ "text": "Monitor {{ctx.monitor.name}} just entered alert status. Please investigate the issue. - Trigger: {{ctx.trigger.name}} - Severity: {{ctx.trigger.severity}} - Period start: {{ctx.periodStart}} - Period end: {{ctx.periodEnd}}" }{% endraw %}
|
||||
```
|
||||
|
||||
In this case, the message content must conform to the `Content-Type` header in the [custom webhook](#create-destinations).
|
||||
|
||||
1. (Optional) Use action throttling to limit the number of notifications you receive within a given span of time.
|
||||
|
||||
For example, if a monitor checks a trigger condition every minute, you could receive one notification per minute. If you set action throttling to 60 minutes, you receive no more than one notification per hour, even if the trigger condition is met dozens of times in that hour.
|
||||
|
||||
1. Choose **Create**.
|
||||
|
||||
After an action sends a message, the content of that message has left the purview of the security plugin. Securing access to the message (e.g. access to the Slack channel) is your responsibility.
|
||||
|
||||
|
||||
#### Sample message
|
||||
|
||||
```mustache
|
||||
{% raw %}Monitor {{ctx.monitor.name}} just entered an alert state. Please investigate the issue.
|
||||
- Trigger: {{ctx.trigger.name}}
|
||||
- Severity: {{ctx.trigger.severity}}
|
||||
- Period start: {{ctx.periodStart}}
|
||||
- Period end: {{ctx.periodEnd}}{% endraw %}
|
||||
```
|
||||
|
||||
If you want to use the `ctx.results` variable in a message, use `{% raw %}{{ctx.results.0}}{% endraw %}` rather than `{% raw %}{{ctx.results[0]}}{% endraw %}`. This difference is due to how Mustache handles bracket notation.
|
||||
{: .note }
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Work with alerts
|
||||
|
||||
Alerts persist until you resolve the root cause and have the following states:
|
||||
|
||||
State | Description
|
||||
:--- | :---
|
||||
Active | The alert is ongoing and unacknowledged. Alerts remain in this state until you acknowledge them, delete the trigger associated with the alert, or delete the monitor entirely.
|
||||
Acknowledged | Someone has acknowledged the alert, but not fixed the root cause.
|
||||
Completed | The alert is no longer ongoing. Alerts enter this state after the corresponding trigger evaluates to false.
|
||||
Error | An error occurred while executing the trigger---usually the result of a a bad trigger or destination.
|
||||
Deleted | Someone deleted the monitor or trigger associated with this alert while the alert was ongoing.
|
|
@ -0,0 +1,79 @@
|
|||
---
|
||||
layout: default
|
||||
title: Alerting Security
|
||||
nav_order: 10
|
||||
parent: Alerting
|
||||
has_children: false
|
||||
---
|
||||
|
||||
# Alerting security
|
||||
|
||||
If you use the security plugin alongside alerting, you might want to limit certain users to certain actions. For example, you might want some users to only be able to view and acknowledge alerts, while others can modify monitors and destinations.
|
||||
|
||||
|
||||
## Basic permissions
|
||||
|
||||
The security plugin has three built-in roles that cover most alerting use cases: `alerting_read_access`, `alerting_ack_alerts`, and `alerting_full_access`. For descriptions of each, see [Predefined roles](../../security/access-control/users-roles/#predefined-roles).
|
||||
|
||||
If these roles don't meet your needs, mix and match individual alerting [permissions](../../security/access-control/permissions/) to suit your use case. Each action corresponds to an operation in the REST API. For example, the `cluster:admin/opensearch/alerting/destination/delete` permission lets you delete destinations.
|
||||
|
||||
|
||||
## How monitors access data
|
||||
|
||||
Monitors run with the permissions of the user who created or last modified them. For example, consider the user `jdoe`, who works at a chain of retail stores. `jdoe` has two roles. Together, these two roles allow read access to three indices: `store1-returns`, `store2-returns`, and `store3-returns`.
|
||||
|
||||
`jdoe` creates a monitor that sends an email to management whenever the number of returns across all three indices exceeds 40 per hour.
|
||||
|
||||
Later, the user `psantos` wants to edit the monitor to run every two hours, but `psantos` only has access to `store1-returns`. To make the change, `psantos` has two options:
|
||||
|
||||
- Update the monitor so that it only checks `store1-returns`.
|
||||
- Ask an administrator for read access to the other two indices.
|
||||
|
||||
After making the change, the monitor now runs with the same permissions as `psantos`, including any [document-level security](../../security/access-control/document-level-security/) queries, [excluded fields](../../security/access-control/field-level-security/), and [masked fields](../../security/access-control/field-masking/). If you use an extraction query to define your monitor, use the **Run** button to ensure that the response includes the fields you need.
|
||||
|
||||
|
||||
## (Advanced) Limit access by backend role
|
||||
|
||||
Out of the box, the alerting plugin has no concept of ownership. For example, if you have the `cluster:admin/opensearch/alerting/monitor/write` permission, you can edit *all* monitors, regardless of whether you created them. If a small number of trusted users manage your monitors and destinations, this lack of ownership generally isn't a problem. A larger organization might need to segment access by backend role.
|
||||
|
||||
First, make sure that your users have the appropriate [backend roles](../../security/access-control/). Backend roles usually come from an [LDAP server](../../security/configuration/ldap/) or [SAML provider](../../security/configuration/saml/). However, if you use the internal user database, you can use the REST API to [add them manually](../../security/access-control/api/#create-user).
|
||||
|
||||
Next, enable the following setting:
|
||||
|
||||
```json
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"transient": {
|
||||
"opendistro.alerting.filter_by_backend_roles": "true"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Now when users view alerting resources in OpenSearch Dashboards (or make REST API calls), they only see monitors and destinations that are created by users who share *at least one* backend role. For example, consider three users who all have full access to alerting: `jdoe`, `jroe`, and `psantos`.
|
||||
|
||||
`jdoe` and `jroe` are on the same team at work and both have the `analyst` backend role. `psantos` has the `human-resources` backend role.
|
||||
|
||||
If `jdoe` creates a monitor, `jroe` can see and modify it, but `psantos` can't. If that monitor generates an alert, the situation is the same: `jroe` can see and acknowledge it, but `psantos` can't. If `psantos` creates a destination, `jdoe` and `jroe` can't see or modify it.
|
||||
|
||||
|
||||
<!-- ## (Advanced) Limit access by individual
|
||||
|
||||
If you only want users to be able to see and modify their own monitors and destinations, duplicate the `alerting_full_access` role and add the following [DLS query](../../security/access-control/document-level-security/) to it:
|
||||
|
||||
```json
|
||||
{
|
||||
"bool": {
|
||||
"should": [{
|
||||
"match": {
|
||||
"monitor.created_by": "${user.name}"
|
||||
}
|
||||
}, {
|
||||
"match": {
|
||||
"destination.created_by": "${user.name}"
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Then, use this new role for all alerting users. -->
|
|
@ -0,0 +1,59 @@
|
|||
---
|
||||
layout: default
|
||||
title: Management
|
||||
parent: Alerting
|
||||
nav_order: 5
|
||||
---
|
||||
|
||||
# Management
|
||||
|
||||
|
||||
## Alerting indices
|
||||
|
||||
The alerting feature creates several indices and one alias. The security plugin demo script configures them as [system indices](../../security/configuration/system-indices/) for an extra layer of protection. Don't delete these indices or modify their contents without using the alerting APIs.
|
||||
|
||||
Index | Purpose
|
||||
:--- | :---
|
||||
`.opendistro-alerting-alerts` | Stores ongoing alerts.
|
||||
`.opendistro-alerting-alert-history-<date>` | Stores a history of completed alerts.
|
||||
`.opendistro-alerting-config` | Stores monitors, triggers, and destinations. [Take a snapshot](../../opensearch/snapshot-restore) of this index to back up your alerting configuration.
|
||||
`.opendistro-alerting-alert-history-write` (alias) | Provides a consistent URI for the `.opendistro-alerting-alert-history-<date>` index.
|
||||
|
||||
All alerting indices are hidden by default. For a summary, make the following request:
|
||||
|
||||
```
|
||||
GET _cat/indices?expand_wildcards=open,hidden
|
||||
```
|
||||
|
||||
|
||||
## Alerting settings
|
||||
|
||||
We don't recommend changing these settings; the defaults should work well for most use cases.
|
||||
|
||||
All settings are available using the OpenSearch `_cluster/settings` API. None require a restart, and all can be marked `persistent` or `transient`.
|
||||
|
||||
Setting | Default | Description
|
||||
:--- | :--- | :---
|
||||
`plugins.scheduled_jobs.enabled` | true | Whether the alerting plugin is enabled or not. If disabled, all monitors immediately stop running.
|
||||
`plugins.alerting.index_timeout` | 60s | The timeout for creating monitors and destinations using the REST APIs.
|
||||
`plugins.alerting.request_timeout` | 10s | The timeout for miscellaneous requests from the plugin.
|
||||
`plugins.alerting.action_throttle_max_value` | 24h | The maximum amount of time you can set for action throttling. By default, this value displays as 1440 minutes in OpenSearch Dashboards.
|
||||
`plugins.alerting.input_timeout` | 30s | How long the monitor can take to issue the search request.
|
||||
`plugins.alerting.bulk_timeout` | 120s | How long the monitor can write alerts to the alert index.
|
||||
`plugins.alerting.alert_backoff_count` | 3 | The number of retries for writing alerts before the operation fails.
|
||||
`plugins.alerting.alert_backoff_millis` | 50ms | The amount of time to wait between retries---increases exponentially after each failed retry.
|
||||
`plugins.alerting.alert_history_rollover_period` | 12h | How frequently to check whether the `.opendistro-alerting-alert-history-write` alias should roll over to a new history index and whether the Alerting plugin should delete any history indices.
|
||||
`plugins.alerting.move_alerts_backoff_millis` | 250 | The amount of time to wait between retries---increases exponentially after each failed retry.
|
||||
`plugins.alerting.move_alerts_backoff_count` | 3 | The number of retries for moving alerts to a deleted state after their monitor or trigger has been deleted.
|
||||
`plugins.alerting.monitor.max_monitors` | 1000 | The maximum number of monitors users can create.
|
||||
`plugins.alerting.alert_history_max_age` | 30d | The oldest document to store in the `.opendistro-alert-history-<date>` index before creating a new index. If the number of alerts in this time period does not exceed `alert_history_max_docs`, alerting creates one history index per period (e.g. one index every 30 days).
|
||||
`plugins.alerting.alert_history_max_docs` | 1000 | The maximum number of alerts to store in the `.opendistro-alert-history-<date>` index before creating a new index.
|
||||
`plugins.alerting.alert_history_enabled` | true | Whether to create `.opendistro-alerting-alert-history-<date>` indices.
|
||||
`plugins.alerting.alert_history_retention_period` | 60d | The amount of time to keep history indices before automatically deleting them.
|
||||
`plugins.alerting.destination.allow_list` | ["chime", "slack", "custom_webhook", "email", "test_action"] | The list of allowed destinations. If you don't want to allow users to a certain type of destination, you can remove it from this list, but we recommend leaving this setting as-is.
|
||||
`plugins.alerting.filter_by_backend_roles` | "false" | Restricts access to monitors by backend role. See [Alerting security](../security/).
|
||||
`plugins.scheduled_jobs.sweeper.period` | 5m | The alerting feature uses its "job sweeper" component to periodically check for new or updated jobs. This setting is the rate at which the sweeper checks to see if any jobs (monitors) have changed and need to be rescheduled.
|
||||
`plugins.scheduled_jobs.sweeper.page_size` | 100 | The page size for the sweeper. You shouldn't need to change this value.
|
||||
`plugins.scheduled_jobs.sweeper.backoff_millis` | 50ms | The amount of time the sweeper waits between retries---increases exponentially after each failed retry.
|
||||
`plugins.scheduled_jobs.sweeper.retry_count` | 3 | The total number of times the sweeper should retry before throwing an error.
|
||||
`plugins.scheduled_jobs.request_timeout` | 10s | The timeout for the request that sweeps shards for jobs.
|
|
@ -0,0 +1,251 @@
|
|||
---
|
||||
layout: default
|
||||
title: Asynchronous search
|
||||
nav_order: 51
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Asynchronous search
|
||||
|
||||
Searching large volumes of data can take a long time, especially if you're searching across warm nodes or multiple remote clusters.
|
||||
|
||||
Asynchronous search in OpenSearch lets you send search requests that run in the background. You can monitor the progress of these searches and get back partial results as they become available. After the search finishes, you can save the results to examine at a later time.
|
||||
|
||||
## REST API
|
||||
|
||||
To perform an asynchronous search, send requests to `_opensearch/_asynchronous_search`, with your query in the request body:
|
||||
|
||||
```json
|
||||
POST _opensearch/_asynchronous_search
|
||||
```
|
||||
|
||||
You can specify the following options.
|
||||
|
||||
Options | Description | Default value | Required
|
||||
:--- | :--- |:--- |:--- |
|
||||
`wait_for_completion_timeout` | The amount of time that you plan to wait for the results. You can see whatever results you get within this time just like in a normal search. You can poll the remaining results based on an ID. The maximum value is 300 seconds. | 1 second | No
|
||||
`keep_on_completion` | Whether you want to save the results in the cluster after the search is complete. You can examine the stored results at a later time. | `false` | No
|
||||
`keep_alive` | The amount of time that the result is saved in the cluster. For example, `2d` means that the results are stored in the cluster for 48 hours. The saved search results are deleted after this period or if the search is canceled. Note that this includes the query execution time. If the query overruns this time, the process cancels this query automatically. | 12 hours | No
|
||||
|
||||
#### Sample request
|
||||
|
||||
```json
|
||||
POST _opensearch/_asynchronous_search/?pretty&size=10&wait_for_completion_timeout=1ms&keep_on_completion=true&request_cache=false
|
||||
{
|
||||
"aggs": {
|
||||
"city": {
|
||||
"terms": {
|
||||
"field": "city",
|
||||
"size": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"*id*": "FklfVlU4eFdIUTh1Q1hyM3ZnT19fUVEUd29KLWZYUUI3TzRpdU5wMjRYOHgAAAAAAAAABg==",
|
||||
"state": "RUNNING",
|
||||
"start_time_in_millis": 1599833301297,
|
||||
"expiration_time_in_millis": 1600265301297,
|
||||
"response": {
|
||||
"took": 15,
|
||||
"timed_out": false,
|
||||
"terminated_early": false,
|
||||
"num_reduce_phases": 4,
|
||||
"_shards": {
|
||||
"total": 21,
|
||||
"successful": 4,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": {
|
||||
"value": 807,
|
||||
"relation": "eq"
|
||||
},
|
||||
"max_score": null,
|
||||
"hits": []
|
||||
},
|
||||
"aggregations": {
|
||||
"city": {
|
||||
"doc_count_error_upper_bound": 16,
|
||||
"sum_other_doc_count": 403,
|
||||
"buckets": [
|
||||
{
|
||||
"key": "downsville",
|
||||
"doc_count": 1
|
||||
},
|
||||
....
|
||||
....
|
||||
....
|
||||
{
|
||||
"key": "blairstown",
|
||||
"doc_count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Response parameters
|
||||
|
||||
Options | Description
|
||||
:--- | :---
|
||||
`id` | The ID of an asynchronous search. Use this ID to monitor the progress of the search, get its partial results, and/or delete the results. If the asynchronous search finishes within the timeout period, the response doesn't include the ID because the results aren't stored in the cluster.
|
||||
`state` | Specifies whether the search is still running or if it has finished, and if the results persist in the cluster. The possible states are `RUNNING`, `COMPLETED`, and `PERSISTED`.
|
||||
`start_time_in_millis` | The start time in milliseconds.
|
||||
`expiration_time_in_millis` | The expiration time in milliseconds.
|
||||
`took` | The total time that the search is running.
|
||||
`response` | The actual search response.
|
||||
`num_reduce_phases` | The number of times that the coordinating node aggregates results from batches of shard responses (5 by default). If this number increases compared to the last retrieved results, you can expect additional results to be included in the search response.
|
||||
`total` | The total number of shards that run the search.
|
||||
`successful` | The number of shard responses that the coordinating node received successfully.
|
||||
`aggregations` | The partial aggregation results that have been completed by the shards so far.
|
||||
|
||||
## Get partial results
|
||||
|
||||
After you submit an asynchronous search request, you can request partial responses with the ID that you see in the asynchronous search response.
|
||||
|
||||
```json
|
||||
GET _opensearch/_asynchronous_search/<ID>?pretty
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "Fk9lQk5aWHJIUUltR2xGWnpVcWtFdVEURUN1SWZYUUJBVkFVMEJCTUlZUUoAAAAAAAAAAg==",
|
||||
"state": "STORE_RESIDENT",
|
||||
"start_time_in_millis": 1599833907465,
|
||||
"expiration_time_in_millis": 1600265907465,
|
||||
"response": {
|
||||
"took": 83,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 20,
|
||||
"successful": 20,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": {
|
||||
"value": 1000,
|
||||
"relation": "eq"
|
||||
},
|
||||
"max_score": 1,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "bank",
|
||||
"_type": "_doc",
|
||||
"_id": "1",
|
||||
"_score": 1,
|
||||
"_source": {
|
||||
"email": "amberduke@abc.com",
|
||||
"city": "Brogan",
|
||||
"state": "IL"
|
||||
}
|
||||
},
|
||||
{....}
|
||||
]
|
||||
},
|
||||
"aggregations": {
|
||||
"city": {
|
||||
"doc_count_error_upper_bound": 0,
|
||||
"sum_other_doc_count": 997,
|
||||
"buckets": [
|
||||
{
|
||||
"key": "belvoir",
|
||||
"doc_count": 2
|
||||
},
|
||||
{
|
||||
"key": "aberdeen",
|
||||
"doc_count": 1
|
||||
},
|
||||
{
|
||||
"key": "abiquiu",
|
||||
"doc_count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
After the response is successfully persisted, you get back the `STORE_RESIDENT` state in the response.
|
||||
|
||||
You can poll the ID with the `wait_for_completion_timeout` parameter to wait for the results received for the time that you specify.
|
||||
|
||||
For asynchronous searches with `keep_on_completion` as `true` and a sufficiently long `keep_alive` time, you can keep polling the IDs until the search finishes. If you don’t want to periodically poll each ID, you can retain the results in your cluster with the `keep_alive` parameter and come back to it at a later time.
|
||||
|
||||
## Delete searches and results
|
||||
|
||||
You can use the DELETE API operation to delete any ongoing asynchronous search by its ID. If the search is still running, it’s canceled. If the search is complete, the saved search results are deleted.
|
||||
|
||||
```json
|
||||
DELETE _opensearch/_asynchronous_search/<ID>?pretty
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"acknowledged": "true"
|
||||
}
|
||||
```
|
||||
|
||||
## Monitor stats
|
||||
|
||||
You can use the stats API operation to monitor asynchronous searches that are running, completed, and/or persisted.
|
||||
|
||||
```json
|
||||
GET _opensearch/_asynchronous_search/stats
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"_nodes": {
|
||||
"total": 8,
|
||||
"successful": 8,
|
||||
"failed": 0
|
||||
},
|
||||
"cluster_name": "264071961897:asynchronous-search",
|
||||
"nodes": {
|
||||
"JKEFl6pdRC-xNkKQauy7Yg": {
|
||||
"asynchronous_search_stats": {
|
||||
"submitted": 18236,
|
||||
"initialized": 112,
|
||||
"search_failed": 56,
|
||||
"search_completed": 56,
|
||||
"rejected": 18124,
|
||||
"persist_failed": 0,
|
||||
"cancelled": 1,
|
||||
"running_current": 399,
|
||||
"persisted": 100
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Response parameters
|
||||
|
||||
Options | Description
|
||||
:--- | :---
|
||||
`submitted` | The number of asynchronous search requests that were submitted.
|
||||
`initialized` | The number of asynchronous search requests that were initialized.
|
||||
`rejected` | The number of asynchronous search requests that were rejected.
|
||||
`search_completed` | The number of asynchronous search requests that completed with a successful response.
|
||||
`search_failed` | The number of asynchronous search requests that completed with a failed response.
|
||||
`persisted` | The number of asynchronous search requests whose final result successfully persisted in the cluster.
|
||||
`persist_failed` | The number of asynchronous search requests whose final result failed to persist in the cluster.
|
||||
`running_current` | The number of asynchronous search requests that are running on a given coordinator node.
|
||||
`cancelled` | The number of asynchronous search requests that were canceled while the search was running.
|
|
@ -0,0 +1,76 @@
|
|||
---
|
||||
layout: default
|
||||
title: Asynchronous search security
|
||||
nav_order: 2
|
||||
parent: Asynchronous search
|
||||
has_children: false
|
||||
---
|
||||
|
||||
# Asynchronous search security
|
||||
|
||||
You can use the security plugin with asynchronous searches to limit non-admin users to specific actions. For example, you might want some users to only be able to submit or delete asynchronous searches, while you might want others to only view the results.
|
||||
|
||||
All asynchronous search indices are protected as system indices. Only a super admin user or an admin user with a Transport Layer Security (TLS) certificate can access system indices. For more information, see [System indices](../../security/configuration/system-indices/).
|
||||
|
||||
## Basic permissions
|
||||
|
||||
As an admin user, you can use the security plugin to assign specific permissions to users based on which API operations they need access to. For a list of supported APIs operations, see [Asynchronous search](../).
|
||||
|
||||
The security plugin has two built-in roles that cover most asynchronous search use cases: `asynchronous_search_full_access` and `asynchronous_search_read_access`. For descriptions of each, see [Predefined roles](../../security/access-control/users-roles/#predefined-roles).
|
||||
|
||||
If these roles don’t meet your needs, mix and match individual asynchronous search permissions to suit your use case. Each action corresponds to an operation in the REST API. For example, the `cluster:admin/opensearch/asynchronous_search/delete` permission lets you delete a previously submitted asynchronous search.
|
||||
|
||||
## (Advanced) Limit access by backend role
|
||||
|
||||
Use backend roles to configure fine-grained access to asynchronous searches based on roles. For example, users of different departments in an organization can view asynchronous searches owned by their own department.
|
||||
|
||||
First, make sure your users have the appropriate [backend roles](../../security/access-control/). Backend roles usually come from an [LDAP server](../../security/configuration/ldap/) or [SAML provider](../../security/configuration/saml/). However, if you use the internal user database, you can use the REST API to [add them manually](../../security/access-control/api/#create-user).
|
||||
|
||||
Now when users view asynchronous search resources in OpenSearch Dashboards (or make REST API calls), they only see asynchronous searches submitted by users who have a subset of the backend role.
|
||||
For example, consider two users: `judy` and `elon`.
|
||||
|
||||
`judy` has an IT backend role:
|
||||
|
||||
```json
|
||||
PUT _opensearch/_security/api/internalusers/judy
|
||||
{
|
||||
"password": "judy",
|
||||
"backend_roles": [
|
||||
"IT"
|
||||
],
|
||||
"attributes": {}
|
||||
}
|
||||
```
|
||||
|
||||
`elon` has an admin backend role:
|
||||
|
||||
```json
|
||||
PUT _opensearch/_security/api/internalusers/elon
|
||||
{
|
||||
"password": "elon",
|
||||
"backend_roles": [
|
||||
"admin"
|
||||
],
|
||||
"attributes": {}
|
||||
}
|
||||
```
|
||||
|
||||
Both `judy` and `elon` have full access to asynchronous search:
|
||||
|
||||
```json
|
||||
PUT _opensearch/_security/api/rolesmapping/async_full_access
|
||||
{
|
||||
"backend_roles": [],
|
||||
"hosts": [],
|
||||
"users": [
|
||||
"judy",
|
||||
"elon"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Because they have different backend roles, an asynchronous search submitted by `judy` will not be visible to `elon` and vice versa.
|
||||
|
||||
`judy` needs to have at least the superset of all roles that `elon` has to see `elon`'s asynchronous searches.
|
||||
|
||||
For example, if `judy` has five backend roles and `elon` has one of these roles, then `judy` can see asynchronous searches submitted by `elon`, but `elon` can’t see the asynchronous searches submitted by `judy`. This means that `judy` can perform GET and DELETE operations on asynchronous searches submitted by `elon`, but not the reverse.
|
|
@ -0,0 +1,29 @@
|
|||
---
|
||||
layout: default
|
||||
title: Settings
|
||||
parent: Asynchronous search
|
||||
nav_order: 4
|
||||
---
|
||||
|
||||
# Settings
|
||||
|
||||
The asynchronous search plugin adds several settings to the standard OpenSearch cluster settings. They are dynamic, so you can change the default behavior of the plugin without restarting your cluster. You can mark the settings as `persistent` or `transient`.
|
||||
|
||||
For example, to update the retention period of the result index:
|
||||
|
||||
```json
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"transient": {
|
||||
"opensearch.asynchronous_search.max_wait_for_completion_timeout": "5m"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Setting | Default | Description
|
||||
:--- | :--- | :---
|
||||
`opensearch.asynchronous_search.max_search_running_time` | 12 hours | The maximum running time for the search beyond which the search is terminated.
|
||||
`opensearch.asynchronous_search.node_concurrent_running_searches` | 20 | The concurrent searches running per coordinator node.
|
||||
`opensearch.asynchronous_search.max_keep_alive` | 5 days | The maximum amount of time that search results can be stored in the cluster.
|
||||
`opensearch.asynchronous_search.max_wait_for_completion_timeout` | 1 minute | The maximum value for the `wait_for_completion_timeout` parameter.
|
||||
`opensearch.asynchronous_search.persist_search_failures` | false | Persist asynchronous search results that end with a search failure in the system index.
|
|
@ -0,0 +1,100 @@
|
|||
---
|
||||
layout: default
|
||||
title: OpenSearch CLI
|
||||
nav_order: 52
|
||||
has_children: false
|
||||
---
|
||||
|
||||
# OpenSearch CLI
|
||||
|
||||
The OpenSearch CLI command line interface (opensearch-cli) lets you manage your OpenSearch cluster from the command line and automate tasks.
|
||||
|
||||
Currently, opensearch-cli supports the [Anomaly Detection](../ad/) and [k-NN](../knn/) plugins, along with arbitrary REST API paths. Among other things, you can use opensearch-cli to create and delete detectors, start and stop them, and check k-NN statistics.
|
||||
|
||||
Profiles let you easily access different clusters or sign requests with different credentials. opensearch-cli supports unauthenticated requests, HTTP basic signing, and IAM signing for Amazon Web Services.
|
||||
|
||||
This example moves a detector (`ecommerce-count-quantity`) from a staging cluster to a production cluster:
|
||||
|
||||
```bash
|
||||
opensearch-cli ad get ecommerce-count-quantity --profile staging > ecommerce-count-quantity.json
|
||||
opensearch-cli ad create ecommerce-count-quantity.json --profile production
|
||||
opensearch-cli ad start ecommerce-count-quantity.json --profile production
|
||||
opensearch-cli ad stop ecommerce-count-quantity --profile staging
|
||||
opensearch-cli ad delete ecommerce-count-quantity --profile staging
|
||||
```
|
||||
|
||||
|
||||
## Install
|
||||
|
||||
1. [Download](https://opensearch.org/downloads.html){:target='\_blank'} and extract the appropriate installation package for your computer.
|
||||
|
||||
1. Make the `opensearch-cli` file executable:
|
||||
|
||||
```bash
|
||||
chmod +x ./opensearch-cli
|
||||
```
|
||||
|
||||
1. Add the command to your path:
|
||||
|
||||
```bash
|
||||
export PATH=$PATH:$(pwd)
|
||||
```
|
||||
|
||||
1. Confirm the CLI is working properly:
|
||||
|
||||
```bash
|
||||
opensearch-cli --version
|
||||
```
|
||||
|
||||
|
||||
## Profiles
|
||||
|
||||
Profiles let you easily switch between different clusters and user credentials. To get started, run `opensearch-cli profile create` with the `--auth-type`, `--endpoint`, and `--name` options:
|
||||
|
||||
```bash
|
||||
opensearch-cli profile create --auth-type basic --endpoint https://localhost:9200 --name docker-local
|
||||
```
|
||||
|
||||
Alternatively, save a configuration file to `~/.opensearch-cli/config.yaml`:
|
||||
|
||||
```yaml
|
||||
profiles:
|
||||
- name: docker-local
|
||||
endpoint: https://localhost:9200
|
||||
user: admin
|
||||
password: foobar
|
||||
- name: aws
|
||||
endpoint: https://some-cluster.us-east-1.es.amazonaws.com
|
||||
aws_iam:
|
||||
profile: ""
|
||||
service: es
|
||||
```
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
opensearch-cli commands use the following syntax:
|
||||
|
||||
```bash
|
||||
opensearch-cli <command> <subcommand> <flags>
|
||||
```
|
||||
|
||||
For example, the following command retrieves information about a detector:
|
||||
|
||||
```bash
|
||||
opensearch-cli ad get my-detector --profile docker-local
|
||||
```
|
||||
|
||||
For a request to the OpenSearch CAT API, try the following command:
|
||||
|
||||
```bash
|
||||
opensearch-cli curl get --path _cat/plugins --profile aws
|
||||
```
|
||||
|
||||
Use the `-h` or `--help` flag to see all supported commands, subcommands, or usage for a specific command:
|
||||
|
||||
```bash
|
||||
opensearch-cli -h
|
||||
opensearch-cli ad -h
|
||||
opensearch-cli ad get -h
|
||||
```
|
|
@ -0,0 +1,457 @@
|
|||
---
|
||||
layout: default
|
||||
title: Index Rollups
|
||||
nav_order: 35
|
||||
parent: Index management
|
||||
has_children: true
|
||||
redirect_from: /docs/ism/index-rollups/
|
||||
has_toc: false
|
||||
---
|
||||
|
||||
# Index Rollups
|
||||
|
||||
Time series data increases storage costs, strains cluster health, and slows down aggregations over time. Index rollup lets you periodically reduce data granularity by rolling up old data into summarized indices.
|
||||
|
||||
You pick the fields that interest you and use index rollup to create a new index with only those fields aggregated into coarser time buckets. You can store months or years of historical data at a fraction of the cost with the same query performance.
|
||||
|
||||
For example, say you collect CPU consumption data every five seconds and store it on a hot node. Instead of moving older data to a read-only warm node, you can roll up or compress this data with only the average CPU consumption per day or with a 10% decrease in its interval every week.
|
||||
|
||||
You can use index rollup in three ways:
|
||||
|
||||
1. Use the index rollup API for an on-demand index rollup job that operates on an index that's not being actively ingested such as a rolled-over index. For example, you can perform an index rollup operation to reduce data collected at a five minute interval to a weekly average for trend analysis.
|
||||
2. Use the OpenSearch Dashboards UI to create an index rollup job that runs on a defined schedule. You can also set it up to roll up your indices as it’s being actively ingested. For example, you can continuously roll up Logstash indices from a five second interval to a one hour interval.
|
||||
3. Specify the index rollup job as an ISM action for complete index management. This allows you to roll up an index after a certain event such as a rollover, index age reaching a certain point, index becoming read-only, and so on. You can also have rollover and index rollup jobs running in sequence, where the rollover first moves the current index to a warm node and then the index rollup job creates a new index with the minimized data on the hot node.
|
||||
|
||||
## Create an Index Rollup Job
|
||||
|
||||
To get started, choose **Index Management** in OpenSearch Dashboards.
|
||||
Select **Rollup Jobs** and choose **Create rollup job**.
|
||||
|
||||
### Step 1: Set up indices
|
||||
|
||||
1. In the **Job name and description** section, specify a unique name and an optional description for the index rollup job.
|
||||
2. In the **Indices** section, select the source and target index. The source index is the one that you want to roll up. The source index remains as is, the index rollup job creates a new index referred to as a target index. The target index is where the index rollup results are saved. For target index, you can either type in a name for a new index or you select an existing index.
|
||||
5. Choose **Next**
|
||||
|
||||
After you create an index rollup job, you can't change your index selections.
|
||||
|
||||
### Step 2: Define aggregations and metrics
|
||||
|
||||
Select the attributes with the aggregations (terms and histograms) and metrics (avg, sum, max, min, and value count) that you want to roll up. Make sure you don’t add a lot of highly granular attributes, because you won’t save much space.
|
||||
|
||||
For example, consider a dataset of cities and demographics within those cities. You can aggregate based on cities and specify demographics within a city as metrics.
|
||||
The order in which you select attributes is critical. A city followed by a demographic is different from a demographic followed by a city.
|
||||
|
||||
1. In the **Time aggregation** section, select a timestamp field. Choose between a **Fixed** or **Calendar** interval type and specify the interval and timezone. The index rollup job uses this information to create a date histogram for the timestamp field.
|
||||
2. (Optional) Add additional aggregations for each field. You can choose terms aggregation for all field types and histogram aggregation only for numeric fields.
|
||||
3. (Optional) Add additional metrics for each field. You can choose between **All**, **Min**, **Max**, **Sum**, **Avg**, or **Value Count**.
|
||||
4. Choose **Next**.
|
||||
|
||||
### Step 3: Specify schedule
|
||||
|
||||
Specify a schedule to roll up your indices as it’s being ingested. The index rollup job is enabled by default.
|
||||
|
||||
1. Specify if the data is continuous or not.
|
||||
3. For roll up execution frequency, select **Define by fixed interval** and specify the **Rollup interval** and the time unit or **Define by cron expression** and add in a cron expression to select the interval. To learn how to define a cron expression, see [Alerting](../alerting/cron/).
|
||||
4. Specify the number of pages per execution process. A larger number means faster execution and more cost for memory.
|
||||
5. (Optional) Add a delay to the roll up executions. This is the amount of time the job waits for data ingestion to accommodate any processing time. For example, if you set this value to 10 minutes, an index rollup that executes at 2 PM to roll up 1 PM to 2 PM of data starts at 2:10 PM.
|
||||
6. Choose **Next**.
|
||||
|
||||
### Step 4: Review and create
|
||||
|
||||
Review your configuration and select **Create**.
|
||||
|
||||
### Step 5: Search the target index
|
||||
|
||||
You can use the standard `_search` API to search the target index. Make sure that the query matches the constraints of the target index. For example, if you don’t set up terms aggregations on a field, you don’t receive results for terms aggregations. If you don’t set up the maximum aggregations, you don’t receive results for maximum aggregations.
|
||||
|
||||
You can’t access the internal structure of the data in the target index because the plugin automatically rewrites the query in the background to suit the target index. This is to make sure you can use the same query for the source and target index.
|
||||
|
||||
To query the target index, set `size` to 0:
|
||||
|
||||
```json
|
||||
GET target_index/_search
|
||||
{
|
||||
"size": 0,
|
||||
"query": {
|
||||
"match_all": {}
|
||||
},
|
||||
"aggs": {
|
||||
"avg_cpu": {
|
||||
"avg": {
|
||||
"field": "cpu_usage"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Consider a scenario where you collect rolled up data from 1 PM to 9 PM in hourly intervals and live data from 7 PM to 11 PM in minutely intervals. If you execute an aggregation over these in the same query, for 7 PM to 9 PM, you see an overlap of both rolled up data and live data because they get counted twice in the aggregations.
|
||||
|
||||
## Sample Walkthrough
|
||||
|
||||
This walkthrough uses the OpenSearch Dashboards sample e-commerce data. To add that sample data, log in to OpenSearch Dashboards, choose **Home** and **Try our sample data**. For **Sample eCommerce orders**, choose **Add data**.
|
||||
|
||||
Then run a search:
|
||||
|
||||
```json
|
||||
GET opensearch_dashboards_sample_data_ecommerce/_search
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"took": 23,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 1,
|
||||
"successful": 1,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": {
|
||||
"value": 4675,
|
||||
"relation": "eq"
|
||||
},
|
||||
"max_score": 1,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "opensearch_dashboards_sample_data_ecommerce",
|
||||
"_type": "_doc",
|
||||
"_id": "jlMlwXcBQVLeQPrkC_kQ",
|
||||
"_score": 1,
|
||||
"_source": {
|
||||
"category": [
|
||||
"Women's Clothing",
|
||||
"Women's Accessories"
|
||||
],
|
||||
"currency": "EUR",
|
||||
"customer_first_name": "Selena",
|
||||
"customer_full_name": "Selena Mullins",
|
||||
"customer_gender": "FEMALE",
|
||||
"customer_id": 42,
|
||||
"customer_last_name": "Mullins",
|
||||
"customer_phone": "",
|
||||
"day_of_week": "Saturday",
|
||||
"day_of_week_i": 5,
|
||||
"email": "selena@mullins-family.zzz",
|
||||
"manufacturer": [
|
||||
"Tigress Enterprises"
|
||||
],
|
||||
"order_date": "2021-02-27T03:56:10+00:00",
|
||||
"order_id": 581553,
|
||||
"products": [
|
||||
{
|
||||
"base_price": 24.99,
|
||||
"discount_percentage": 0,
|
||||
"quantity": 1,
|
||||
"manufacturer": "Tigress Enterprises",
|
||||
"tax_amount": 0,
|
||||
"product_id": 19240,
|
||||
"category": "Women's Clothing",
|
||||
"sku": "ZO0064500645",
|
||||
"taxless_price": 24.99,
|
||||
"unit_discount_amount": 0,
|
||||
"min_price": 12.99,
|
||||
"_id": "sold_product_581553_19240",
|
||||
"discount_amount": 0,
|
||||
"created_on": "2016-12-24T03:56:10+00:00",
|
||||
"product_name": "Blouse - port royal",
|
||||
"price": 24.99,
|
||||
"taxful_price": 24.99,
|
||||
"base_unit_price": 24.99
|
||||
},
|
||||
{
|
||||
"base_price": 10.99,
|
||||
"discount_percentage": 0,
|
||||
"quantity": 1,
|
||||
"manufacturer": "Tigress Enterprises",
|
||||
"tax_amount": 0,
|
||||
"product_id": 17221,
|
||||
"category": "Women's Accessories",
|
||||
"sku": "ZO0085200852",
|
||||
"taxless_price": 10.99,
|
||||
"unit_discount_amount": 0,
|
||||
"min_price": 5.06,
|
||||
"_id": "sold_product_581553_17221",
|
||||
"discount_amount": 0,
|
||||
"created_on": "2016-12-24T03:56:10+00:00",
|
||||
"product_name": "Snood - rose",
|
||||
"price": 10.99,
|
||||
"taxful_price": 10.99,
|
||||
"base_unit_price": 10.99
|
||||
}
|
||||
],
|
||||
"sku": [
|
||||
"ZO0064500645",
|
||||
"ZO0085200852"
|
||||
],
|
||||
"taxful_total_price": 35.98,
|
||||
"taxless_total_price": 35.98,
|
||||
"total_quantity": 2,
|
||||
"total_unique_products": 2,
|
||||
"type": "order",
|
||||
"user": "selena",
|
||||
"geoip": {
|
||||
"country_iso_code": "MA",
|
||||
"location": {
|
||||
"lon": -8,
|
||||
"lat": 31.6
|
||||
},
|
||||
"region_name": "Marrakech-Tensift-Al Haouz",
|
||||
"continent_name": "Africa",
|
||||
"city_name": "Marrakesh"
|
||||
},
|
||||
"event": {
|
||||
"dataset": "sample_ecommerce"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
...
|
||||
```
|
||||
|
||||
Create an index rollup job.
|
||||
This example picks the `order_date`, `customer_gender`, `geoip.city_name`, `geoip.region_name`, and `day_of_week` fields and rolls them into an `example_rollup` target index:
|
||||
|
||||
```json
|
||||
PUT _plugins/_rollup/jobs/example
|
||||
{
|
||||
"rollup": {
|
||||
"enabled": true,
|
||||
"schedule": {
|
||||
"interval": {
|
||||
"period": 1,
|
||||
"unit": "Minutes",
|
||||
"start_time": 1602100553
|
||||
}
|
||||
},
|
||||
"last_updated_time": 1602100553,
|
||||
"description": "An example policy that rolls up the sample ecommerce data",
|
||||
"source_index": "opensearch_dashboards_sample_data_ecommerce",
|
||||
"target_index": "example_rollup",
|
||||
"page_size": 1000,
|
||||
"delay": 0,
|
||||
"continuous": false,
|
||||
"dimensions": [
|
||||
{
|
||||
"date_histogram": {
|
||||
"source_field": "order_date",
|
||||
"fixed_interval": "60m",
|
||||
"timezone": "America/Los_Angeles"
|
||||
}
|
||||
},
|
||||
{
|
||||
"terms": {
|
||||
"source_field": "customer_gender"
|
||||
}
|
||||
},
|
||||
{
|
||||
"terms": {
|
||||
"source_field": "geoip.city_name"
|
||||
}
|
||||
},
|
||||
{
|
||||
"terms": {
|
||||
"source_field": "geoip.region_name"
|
||||
}
|
||||
},
|
||||
{
|
||||
"terms": {
|
||||
"source_field": "day_of_week"
|
||||
}
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"source_field": "taxless_total_price",
|
||||
"metrics": [
|
||||
{
|
||||
"avg": {}
|
||||
},
|
||||
{
|
||||
"sum": {}
|
||||
},
|
||||
{
|
||||
"max": {}
|
||||
},
|
||||
{
|
||||
"min": {}
|
||||
},
|
||||
{
|
||||
"value_count": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"source_field": "total_quantity",
|
||||
"metrics": [
|
||||
{
|
||||
"avg": {}
|
||||
},
|
||||
{
|
||||
"max": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can query the `example_rollup` index for the terms aggregations on the fields set up in the rollup job.
|
||||
You get back the same response that you would on the original `opensearch_dashboards_sample_data_ecommerce` source index.
|
||||
|
||||
```json
|
||||
POST example_rollup/_search
|
||||
{
|
||||
"size": 0,
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": {"term": { "geoip.region_name": "California" } }
|
||||
}
|
||||
},
|
||||
"aggregations": {
|
||||
"daily_numbers": {
|
||||
"terms": {
|
||||
"field": "day_of_week"
|
||||
},
|
||||
"aggs": {
|
||||
"per_city": {
|
||||
"terms": {
|
||||
"field": "geoip.city_name"
|
||||
},
|
||||
"aggregations": {
|
||||
"average quantity": {
|
||||
"avg": {
|
||||
"field": "total_quantity"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"total_revenue": {
|
||||
"sum": {
|
||||
"field": "taxless_total_price"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample Response
|
||||
|
||||
```json
|
||||
{
|
||||
"took": 476,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 1,
|
||||
"successful": 1,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": {
|
||||
"value": 281,
|
||||
"relation": "eq"
|
||||
},
|
||||
"max_score": null,
|
||||
"hits": []
|
||||
},
|
||||
"aggregations": {
|
||||
"daily_numbers": {
|
||||
"doc_count_error_upper_bound": 0,
|
||||
"sum_other_doc_count": 0,
|
||||
"buckets": [
|
||||
{
|
||||
"key": "Friday",
|
||||
"doc_count": 53,
|
||||
"total_revenue": {
|
||||
"value": 4858.84375
|
||||
},
|
||||
"per_city": {
|
||||
"doc_count_error_upper_bound": 0,
|
||||
"sum_other_doc_count": 0,
|
||||
"buckets": [
|
||||
{
|
||||
"key": "Los Angeles",
|
||||
"doc_count": 53,
|
||||
"average quantity": {
|
||||
"value": 2.305084745762712
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "Saturday",
|
||||
"doc_count": 43,
|
||||
"total_revenue": {
|
||||
"value": 3547.203125
|
||||
},
|
||||
"per_city": {
|
||||
"doc_count_error_upper_bound": 0,
|
||||
"sum_other_doc_count": 0,
|
||||
"buckets": [
|
||||
{
|
||||
"key": "Los Angeles",
|
||||
"doc_count": 43,
|
||||
"average quantity": {
|
||||
"value": 2.260869565217391
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "Tuesday",
|
||||
"doc_count": 42,
|
||||
"total_revenue": {
|
||||
"value": 3983.28125
|
||||
},
|
||||
"per_city": {
|
||||
"doc_count_error_upper_bound": 0,
|
||||
"sum_other_doc_count": 0,
|
||||
"buckets": [
|
||||
{
|
||||
"key": "Los Angeles",
|
||||
"doc_count": 42,
|
||||
"average quantity": {
|
||||
"value": 2.2888888888888888
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "Sunday",
|
||||
"doc_count": 40,
|
||||
"total_revenue": {
|
||||
"value": 3308.1640625
|
||||
},
|
||||
"per_city": {
|
||||
"doc_count_error_upper_bound": 0,
|
||||
"sum_other_doc_count": 0,
|
||||
"buckets": [
|
||||
{
|
||||
"key": "Los Angeles",
|
||||
"doc_count": 40,
|
||||
"average quantity": {
|
||||
"value": 2.090909090909091
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
...
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,235 @@
|
|||
---
|
||||
layout: default
|
||||
title: Index Rollups API
|
||||
parent: Index Rollups
|
||||
grand_parent: Index management
|
||||
redirect_from: /docs/ism/rollup-api/
|
||||
nav_order: 9
|
||||
---
|
||||
|
||||
# Index Rollups API
|
||||
|
||||
Use the index rollup operations to programmatically work with index rollup jobs.
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
- TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Create or update an index rollup job
|
||||
|
||||
Creates or updates an index rollup job.
|
||||
You must provide the `seq_no` and `primary_term` parameters.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
PUT _plugins/_rollup/jobs/<rollup_id> // Create
|
||||
PUT _plugins/_rollup/jobs/<rollup_id>?if_seq_no=1&if_primary_term=1 // Update
|
||||
{
|
||||
"rollup": {
|
||||
"source_index": "nyc-taxi-data",
|
||||
"target_index": "rollup-nyc-taxi-data",
|
||||
"schedule": {
|
||||
"interval": {
|
||||
"period": 1,
|
||||
"unit": "Days"
|
||||
}
|
||||
},
|
||||
"description": "Example rollup job",
|
||||
"enabled": true,
|
||||
"page_size": 200,
|
||||
"delay": 0,
|
||||
"roles": [
|
||||
"rollup_all",
|
||||
"nyc_taxi_all",
|
||||
"example_rollup_index_all"
|
||||
],
|
||||
"continuous": false,
|
||||
"dimensions": {
|
||||
"date_histogram": {
|
||||
"source_field": "tpep_pickup_datetime",
|
||||
"fixed_interval": "1h",
|
||||
"timezone": "America/Los_Angeles"
|
||||
},
|
||||
"terms": {
|
||||
"source_field": "PULocationID"
|
||||
},
|
||||
"metrics": [
|
||||
{
|
||||
"source_field": "passenger_count",
|
||||
"metrics": [
|
||||
{
|
||||
"avg": {}
|
||||
},
|
||||
{
|
||||
"sum": {}
|
||||
},
|
||||
{
|
||||
"max": {}
|
||||
},
|
||||
{
|
||||
"min": {}
|
||||
},
|
||||
{
|
||||
"value_count": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can specify the following options.
|
||||
|
||||
Options | Description | Type | Required
|
||||
:--- | :--- |:--- |:--- |
|
||||
`source_index` | The name of the detector. | `string` | Yes
|
||||
`target_index` | Specify the target index that the rolled up data is ingested into. You could either create a new target index or use an existing index. The target index cannot be a combination of raw and rolled up data. | `string` | Yes
|
||||
`schedule` | Schedule of the index rollup job which can be an interval or a cron expression. | `object` | Yes
|
||||
`schedule.interval` | Specify the frequency of execution of the rollup job. | `object` | No
|
||||
`schedule.interval.start_time` | Start time of the interval. | `timestamp` | Yes
|
||||
`schedule.interval.period` | Define the interval period. | `string` | Yes
|
||||
`schedule.interval.unit` | Specify the time unit of the interval. | `string` | Yes
|
||||
`schedule.interval.cron` | Optionally, specify a cron expression to define therollup frequency. | `list` | No
|
||||
`schedule.interval.cron.expression` | Specify a Unix cron expression. | `string` | Yes
|
||||
`schedule.interval.cron.timezone` | Specify timezones as defined by the IANA Time Zone Database. Defaults to UTC. | `string` | No
|
||||
`description` | Optionally, describe the rollup job. | `string` | No
|
||||
`enabled` | When true, the index rollup job is scheduled. Default is true. | `boolean` | Yes
|
||||
`continuous` | Specify whether or not the index rollup job continuously rolls up data forever or just executes over the current data set once and stops. Default is false. | `boolean` | Yes
|
||||
`error_notification` | Set up a Mustache message template sent for error notifications. For example, if an index rollup job fails, the system sends a message to a Slack channel. | `object` | No
|
||||
`page_size` | Specify the number of buckets to paginate through at a time while rolling up. | `number` | Yes
|
||||
`delay` | Specify time value to delay execution of the index rollup job. | `time_unit` | No
|
||||
`dimensions` | Specify aggregations to create dimensions for the roll up time window. | `object` | Yes
|
||||
`dimensions.date_histogram` | Specify either fixed_interval or calendar_interval, but not both. Either one limits what you can query in the target index. | `object` | No
|
||||
`dimensions.date_histogram.fixed_interval` | Specify the fixed interval for aggregations in milliseconds, seconds, minutes, hours, or days. | `string` | No
|
||||
`dimensions.date_histogram.calendar_interval` | Specify the calendar interval for aggregations in minutes, hours, days, weeks, months, quarters, or years. | `string` | No
|
||||
`dimensions.date_histogram.field` | Specify the date field used in date histogram aggregation. | `string` | No
|
||||
`dimensions.date_histogram.timezone` | Specify the timezones as defined by the IANA Time Zone Database. The default is UTC. | `string` | No
|
||||
`dimensions.terms` | Specify the term aggregations that you want to roll up. | `object` | No
|
||||
`dimensions.terms.fields` | Specify terms aggregation for compatible fields. | `object` | No
|
||||
`dimensions.histogram` | Specify the histogram aggregations that you want to roll up. | `object` | No
|
||||
`dimensions.histogram.field` | Add a field for histogram aggregations. | `string` | Yes
|
||||
`dimensions.histogram.interval` | Specify the histogram aggregation interval for the field. | `long` | Yes
|
||||
`dimensions.metrics` | Specify a list of objects that represent the fields and metrics that you want to calculate. | `nested object` | No
|
||||
`dimensions.metrics.field` | Specify the field that you want to perform metric aggregations on. | `string` | No
|
||||
`dimensions.metrics.field.metrics` | Specify the metric aggregations you want to calculate for the field. | `multiple strings` | No
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"_id": "rollup_id",
|
||||
"_seqNo": 1,
|
||||
"_primaryTerm": 1,
|
||||
"rollup": { ... }
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Get an index rollup job
|
||||
|
||||
Returns all information about an index rollup job based on the `rollup_id`.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
GET _plugins/_rollup/jobs/<rollup_id>
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"_id": "my_rollup",
|
||||
"_seqNo": 1,
|
||||
"_primaryTerm": 1,
|
||||
"rollup": { ... }
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Delete an index rollup job
|
||||
|
||||
Deletes an index rollup job based on the `rollup_id`.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
DELETE _plugins/_rollup/jobs/<rollup_id>
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
200 OK
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Start or stop an index rollup job
|
||||
|
||||
Start or stop an index rollup job.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
POST _plugins/_rollup/jobs/<rollup_id>/_start
|
||||
POST _plugins/_rollup/jobs/<rollup_id>/_stop
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
200 OK
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Explain an index rollup job
|
||||
|
||||
Returns detailed metadata information about the index rollup job and its current progress.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
GET _plugins/_rollup/jobs/<rollup_id>/_explain
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"example_rollup": {
|
||||
"rollup_id": "example_rollup",
|
||||
"last_updated_time": 1602014281,
|
||||
"continuous": {
|
||||
"next_window_start_time": 1602055591,
|
||||
"next_window_end_time": 1602075591
|
||||
},
|
||||
"status": "running",
|
||||
"failure_reason": null,
|
||||
"stats": {
|
||||
"pages_processed": 342,
|
||||
"documents_processed": 489359,
|
||||
"rollups_indexed": 3420,
|
||||
"index_time_in_ms": 30495,
|
||||
"search_time_in_ms": 584922
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
layout: default
|
||||
title: Index management
|
||||
nav_order: 30
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Index Management
|
||||
OpenSearch Dashboards
|
||||
{: .label .label-yellow :}
|
||||
|
||||
The Index Management (IM) plugin lets you automate recurring index management activities and reduce storage costs.
|
|
@ -0,0 +1,494 @@
|
|||
---
|
||||
layout: default
|
||||
title: ISM API
|
||||
parent: Index State Management
|
||||
grand_parent: Index management
|
||||
redirect_from: /docs/ism/api/
|
||||
nav_order: 5
|
||||
---
|
||||
|
||||
# ISM API
|
||||
|
||||
Use the index state management operations to programmatically work with policies and managed indices.
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
- TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Create policy
|
||||
|
||||
Creates a policy.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
PUT _plugins/_ism/policies/policy_1
|
||||
{
|
||||
"policy": {
|
||||
"description": "ingesting logs",
|
||||
"default_state": "ingest",
|
||||
"states": [
|
||||
{
|
||||
"name": "ingest",
|
||||
"actions": [
|
||||
{
|
||||
"rollover": {
|
||||
"min_doc_count": 5
|
||||
}
|
||||
}
|
||||
],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "search"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "search",
|
||||
"actions": [],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "delete",
|
||||
"conditions": {
|
||||
"min_index_age": "5m"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "delete",
|
||||
"actions": [
|
||||
{
|
||||
"delete": {}
|
||||
}
|
||||
],
|
||||
"transitions": []
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"_id": "policy_1",
|
||||
"_version": 1,
|
||||
"_primary_term": 1,
|
||||
"_seq_no": 7,
|
||||
"policy": {
|
||||
"policy": {
|
||||
"policy_id": "policy_1",
|
||||
"description": "ingesting logs",
|
||||
"last_updated_time": 1577990761311,
|
||||
"schema_version": 1,
|
||||
"error_notification": null,
|
||||
"default_state": "ingest",
|
||||
"states": [
|
||||
{
|
||||
"name": "ingest",
|
||||
"actions": [
|
||||
{
|
||||
"rollover": {
|
||||
"min_doc_count": 5
|
||||
}
|
||||
}
|
||||
],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "search"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "search",
|
||||
"actions": [],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "delete",
|
||||
"conditions": {
|
||||
"min_index_age": "5m"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "delete",
|
||||
"actions": [
|
||||
{
|
||||
"delete": {}
|
||||
}
|
||||
],
|
||||
"transitions": []
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Add policy
|
||||
|
||||
Adds a policy to an index. This operation does not change the policy if the index already has one.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
POST _plugins/_ism/add/index_1
|
||||
{
|
||||
"policy_id": "policy_1"
|
||||
}
|
||||
```
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"updated_indices": 1,
|
||||
"failures": false,
|
||||
"failed_indices": []
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Update policy
|
||||
|
||||
Updates a policy. Use the `seq_no` and `primary_term` parameters to update an existing policy. If these numbers don't match the existing policy or the policy doesn't exist, ISM throws an error.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
PUT _plugins/_ism/policies/policy_1?if_seq_no=7&if_primary_term=1
|
||||
{
|
||||
"policy": {
|
||||
"description": "ingesting logs",
|
||||
"default_state": "ingest",
|
||||
"states": [
|
||||
{
|
||||
"name": "ingest",
|
||||
"actions": [
|
||||
{
|
||||
"rollover": {
|
||||
"min_doc_count": 5
|
||||
}
|
||||
}
|
||||
],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "search"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "search",
|
||||
"actions": [],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "delete",
|
||||
"conditions": {
|
||||
"min_index_age": "5m"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "delete",
|
||||
"actions": [
|
||||
{
|
||||
"delete": {}
|
||||
}
|
||||
],
|
||||
"transitions": []
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"_id": "policy_1",
|
||||
"_version": 2,
|
||||
"_primary_term": 1,
|
||||
"_seq_no": 10,
|
||||
"policy": {
|
||||
"policy": {
|
||||
"policy_id": "policy_1",
|
||||
"description": "ingesting logs",
|
||||
"last_updated_time": 1577990934044,
|
||||
"schema_version": 1,
|
||||
"error_notification": null,
|
||||
"default_state": "ingest",
|
||||
"states": [
|
||||
{
|
||||
"name": "ingest",
|
||||
"actions": [
|
||||
{
|
||||
"rollover": {
|
||||
"min_doc_count": 5
|
||||
}
|
||||
}
|
||||
],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "search"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "search",
|
||||
"actions": [],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "delete",
|
||||
"conditions": {
|
||||
"min_index_age": "5m"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "delete",
|
||||
"actions": [
|
||||
{
|
||||
"delete": {}
|
||||
}
|
||||
],
|
||||
"transitions": []
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Get policy
|
||||
|
||||
Gets the policy by `policy_id`.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
GET _plugins/_ism/policies/policy_1
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"_id": "policy_1",
|
||||
"_version": 2,
|
||||
"_seq_no": 10,
|
||||
"_primary_term": 1,
|
||||
"policy": {
|
||||
"policy_id": "policy_1",
|
||||
"description": "ingesting logs",
|
||||
"last_updated_time": 1577990934044,
|
||||
"schema_version": 1,
|
||||
"error_notification": null,
|
||||
"default_state": "ingest",
|
||||
"states": [
|
||||
{
|
||||
"name": "ingest",
|
||||
"actions": [
|
||||
{
|
||||
"rollover": {
|
||||
"min_doc_count": 5
|
||||
}
|
||||
}
|
||||
],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "search"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "search",
|
||||
"actions": [],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "delete",
|
||||
"conditions": {
|
||||
"min_index_age": "5m"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "delete",
|
||||
"actions": [
|
||||
{
|
||||
"delete": {}
|
||||
}
|
||||
],
|
||||
"transitions": []
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Remove policy from index
|
||||
|
||||
Removes any ISM policy from the index.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
POST _plugins/_ism/remove/index_1
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"updated_indices": 1,
|
||||
"failures": false,
|
||||
"failed_indices": []
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Update managed index policy
|
||||
|
||||
Updates the managed index policy to a new policy (or to a new version of the policy). You can use an index pattern to update multiple indices at once. When updating multiple indices, you might want to include a state filter to only affect certain managed indices. The change policy filters out all the existing managed indices and only applies the change to the ones in the state that you specify. You can also explicitly specify the state that the managed index transitions to after the change policy takes effect.
|
||||
|
||||
A policy change is an asynchronous background process. The changes are queued and are not executed immediately by the background process. This delay in execution protects the currently running managed indices from being put into a broken state. If the policy you are changing to has only some small configuration changes, then the change takes place immediately. For example, if the policy changes the `min_index_age` parameter in a rollover condition from `1000d` to `100d`, this change takes place immediately in its next execution. If the change modifies the state, actions, or the order of actions of the current state the index is in, then the change happens at the end of its current state before transitioning to a new state.
|
||||
|
||||
In this example, the policy applied on the `index_1` index is changed to `policy_1`, which could either be a completely new policy or an updated version of its existing policy. The process only applies the change if the index is currently in the `searches` state. After this change in policy takes place, `index_1` transitions to the `delete` state.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
POST _plugins/_ism/change_policy/index_1
|
||||
{
|
||||
"policy_id": "policy_1",
|
||||
"state": "delete",
|
||||
"include": [
|
||||
{
|
||||
"state": "searches"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"updated_indices": 0,
|
||||
"failures": false,
|
||||
"failed_indices": []
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Retry failed index
|
||||
|
||||
Retries the failed action for an index. For the retry call to succeed, ISM must manage the index, and the index must be in a failed state. You can use index patterns (`*`) to retry multiple failed indices.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
POST _plugins/_ism/retry/index_1
|
||||
{
|
||||
"state": "delete"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"updated_indices": 0,
|
||||
"failures": false,
|
||||
"failed_indices": []
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Explain index
|
||||
|
||||
Gets the current state of the index. You can use index patterns to get the status of multiple indices.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
GET _plugins/_ism/explain/index_1
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"index_1": {
|
||||
"index.opendistro.index_state_management.policy_id": "policy_1"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The `opendistro.index_state_management.policy_id` setting is deprecated starting from version 1.13.0.
|
||||
We retain this field in the response API for consistency.
|
||||
|
||||
---
|
||||
|
||||
## Delete policy
|
||||
|
||||
Deletes the policy by `policy_id`.
|
||||
|
||||
#### Request
|
||||
|
||||
```json
|
||||
DELETE _plugins/_ism/policies/policy_1
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"_index": ".opendistro-ism-config",
|
||||
"_type": "_doc",
|
||||
"_id": "policy_1",
|
||||
"_version": 3,
|
||||
"result": "deleted",
|
||||
"forced_refresh": true,
|
||||
"_shards": {
|
||||
"total": 2,
|
||||
"successful": 2,
|
||||
"failed": 0
|
||||
},
|
||||
"_seq_no": 15,
|
||||
"_primary_term": 1
|
||||
}
|
||||
```
|
|
@ -0,0 +1,103 @@
|
|||
---
|
||||
layout: default
|
||||
title: Index State Management
|
||||
nav_order: 3
|
||||
parent: Index management
|
||||
has_children: true
|
||||
redirect_from: /docs/ism/
|
||||
has_toc: false
|
||||
---
|
||||
|
||||
# Index State Management
|
||||
OpenSearch Dashboards
|
||||
{: .label .label-yellow :}
|
||||
|
||||
If you analyze time-series data, you likely prioritize new data over old data. You might periodically perform certain operations on older indices, such as reducing replica count or deleting them.
|
||||
|
||||
Index State Management (ISM) is a plugin that lets you automate these periodic, administrative operations by triggering them based on changes in the index age, index size, or number of documents. Using the ISM plugin, you can define *policies* that automatically handle index rollovers or deletions to fit your use case.
|
||||
|
||||
For example, you can define a policy that moves your index into a `read_only` state after 30 days and then deletes it after a set period of 90 days. You can also set up the policy to send you a notification message when the index is deleted.
|
||||
|
||||
You might want to perform an index rollover after a certain amount of time or run a `force_merge` operation on an index during off-peak hours to improve search performance during peak hours.
|
||||
|
||||
To use the ISM plugin, your user role needs to be mapped to the `all_access` role that gives you full access to the cluster. To learn more, see [Users and roles](../security/access-control/users-roles/).
|
||||
{: .note }
|
||||
|
||||
## Get started with ISM
|
||||
|
||||
To get started, choose **Index Management** in OpenSearch Dashboards.
|
||||
|
||||
### Step 1: Set up policies
|
||||
|
||||
A policy is a set of rules that describes how an index should be managed. For information about creating a policy, see [Policies](policies/).
|
||||
|
||||
1. Choose the **Index Policies** tab.
|
||||
2. Choose **Create policy**.
|
||||
3. In the **Name policy** section, enter a policy ID.
|
||||
4. In the **Define policy** section, enter your policy.
|
||||
5. Choose **Create**.
|
||||
|
||||
After you create a policy, your next step is to attach this policy to an index or indices.
|
||||
You can set up an `ism_template` in the policy so when you create an index that matches the ISM template pattern, the index will have this policy attached to it:
|
||||
|
||||
```json
|
||||
PUT _plugins/_ism/policies/policy_id
|
||||
{
|
||||
"policy": {
|
||||
"description": "Example policy.",
|
||||
"default_state": "...",
|
||||
"states": [...],
|
||||
"ism_template": {
|
||||
"index_patterns": ["index_name-*"],
|
||||
"priority": 100
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
For an example ISM template policy, see [Sample policy with ISM template](policies/#sample-policy-with-ism-template).
|
||||
|
||||
Older versions of the plugin include the `policy_id` in an index template, so when an index is created that matches the index template pattern, the index will have the policy attached to it:
|
||||
|
||||
```json
|
||||
PUT _index_template/<template_name>
|
||||
{
|
||||
"index_patterns": [
|
||||
"index_name-*"
|
||||
],
|
||||
"template": {
|
||||
"settings": {
|
||||
"opendistro.index_state_management.policy_id": "policy_id"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The `opendistro.index_state_management.policy_id` setting is deprecated. You can continue to automatically manage newly created indices with the ISM template field.
|
||||
{: .note }
|
||||
|
||||
### Step 2: Attach policies to indices
|
||||
|
||||
1. Choose **Indices**.
|
||||
2. Choose the index or indices that you want to attach your policy to.
|
||||
3. Choose **Apply policy**.
|
||||
4. From the **Policy ID** menu, choose the policy that you created.
|
||||
You can see a preview of your policy.
|
||||
5. If your policy includes a rollover operation, specify a rollover alias.
|
||||
Make sure that the alias that you enter already exists. For more information about the rollover operation, see [rollover](policies/#rollover).
|
||||
6. Choose **Apply**.
|
||||
|
||||
After you attach a policy to an index, ISM creates a job that runs every 5 minutes by default to perform policy actions, check conditions, and transition the index into different states. To change the default time interval for this job, see [Settings](settings/).
|
||||
|
||||
If you want to use an OpenSearch operation to create an index with a policy already attached to it, see [create index](api/#create-index).
|
||||
|
||||
### Step 3: Manage indices
|
||||
|
||||
1. Choose **Managed Indices**.
|
||||
2. To change your policy, see [Change Policy](managedindices/#change-policy).
|
||||
3. To attach a rollover alias to your index, select your policy and choose **Add rollover alias**.
|
||||
Make sure that the alias that you enter already exists. For more information about the rollover operation, see [rollover](policies/#rollover).
|
||||
4. To remove a policy, choose your policy, and then choose **Remove policy**.
|
||||
5. To retry a policy, choose your policy, and then choose **Retry policy**.
|
||||
|
||||
For information about managing your policies, see [Managed Indices](managedindices/).
|
|
@ -0,0 +1,75 @@
|
|||
---
|
||||
layout: default
|
||||
title: Managed Indices
|
||||
nav_order: 3
|
||||
parent: Index State Management
|
||||
grand_parent: Index management
|
||||
redirect_from: /docs/ism/managedindices/
|
||||
has_children: false
|
||||
---
|
||||
|
||||
# Managed indices
|
||||
|
||||
You can change or update a policy using the managed index operations.
|
||||
|
||||
This table lists the fields of managed index operations.
|
||||
|
||||
Parameter | Description | Type | Required | Read Only
|
||||
:--- | :--- |:--- |:--- |
|
||||
`name` | The name of the managed index policy. | `string` | Yes | No
|
||||
`index` | The name of the managed index that this policy is managing. | `string` | Yes | No
|
||||
`index_uuid` | The uuid of the index. | `string` | Yes | No
|
||||
`enabled` | When `true`, the managed index is scheduled and run by the scheduler. | `boolean` | Yes | No
|
||||
`enabled_time` | The time the managed index was last enabled. If the managed index process is disabled, then this is null. | `timestamp` | Yes | Yes
|
||||
`last_updated_time` | The time the managed index was last updated. | `timestamp` | Yes | Yes
|
||||
`schedule` | The schedule of the managed index job. | `object` | Yes | No
|
||||
`policy_id` | The name of the policy used by this managed index. | `string` | Yes | No
|
||||
`policy_seq_no` | The sequence number of the policy used by this managed index. | `number` | Yes | No
|
||||
`policy_primary_term` | The primary term of the policy used by this managed index. | `number` | Yes | No
|
||||
`policy_version` | The version of the policy used by this managed index. | `number` | Yes | Yes
|
||||
`policy` | The cached JSON of the policy for the `policy_version` that's used during runs. If the policy is null, it means that this is the first execution of the job and the latest policy document is read in/saved. | `object` | No | No
|
||||
`change_policy` | The information regarding what policy and state to change to. | `object` | No | No
|
||||
`policy_name` | The name of the policy to update to. To update to the latest version, set this to be the same as the current `policy_name`. | `string` | No | Yes
|
||||
`state` | The state of the managed index after it finishes updating. If no state is specified, it's assumed that the policy structure did not change. | `string` | No | Yes
|
||||
|
||||
The following example shows a managed index policy:
|
||||
|
||||
```json
|
||||
{
|
||||
"managed_index": {
|
||||
"name": "my_index",
|
||||
"index": "my_index",
|
||||
"index_uuid": "sOKSOfkdsoSKeofjIS",
|
||||
"enabled": true,
|
||||
"enabled_time": 1553112384,
|
||||
"last_updated_time": 1553112384,
|
||||
"schedule": {
|
||||
"interval": {
|
||||
"period": 1,
|
||||
"unit": "MINUTES",
|
||||
"start_time": 1553112384
|
||||
}
|
||||
},
|
||||
"policy_id": "log_rotation",
|
||||
"policy_version": 1,
|
||||
"policy": {...},
|
||||
"change_policy": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Change policy
|
||||
|
||||
You can change any managed index policy, but ISM has a few constraints in place to make sure that policy changes don't break indices.
|
||||
|
||||
If an index is stuck in its current state, never proceeding, and you want to update its policy immediately, make sure that the new policy includes the same state---same name, same actions, same order---as the old policy. In this case, even if the policy is in the middle of executing an action, ISM applies the new policy.
|
||||
|
||||
If you update the policy without including an identical state, ISM updates the policy only after all actions in the current state finish executing. Alternately, you can choose a specific state in your old policy after which you want the new policy to take effect.
|
||||
|
||||
To change a policy using OpenSearch Dashboards, do the following:
|
||||
|
||||
- Under **Managed indices**, choose the indices that you want to attach the new policy to.
|
||||
- To attach the new policy to indices in specific states, choose **Choose state filters**, and then choose those states.
|
||||
- Under **Choose New Policy**, choose the new policy.
|
||||
- To start the new policy for indices in the current state, choose **Keep indices in their current state after the policy takes effect**.
|
||||
- To start the new policy in a specific state, choose **Start from a chosen state after changing policies**, and then choose the default start state in your new policy.
|
|
@ -0,0 +1,666 @@
|
|||
---
|
||||
layout: default
|
||||
title: Policies
|
||||
nav_order: 1
|
||||
parent: Index State Management
|
||||
grand_parent: Index management
|
||||
redirect_from: /docs/ism/policies/
|
||||
has_children: false
|
||||
---
|
||||
|
||||
# Policies
|
||||
|
||||
Policies are JSON documents that define the following:
|
||||
|
||||
- The *states* that an index can be in, including the default state for new indices. For example, you might name your states "hot," "warm," "delete," and so on. For more information, see [States](#states).
|
||||
- Any *actions* that you want the plugin to take when an index enters a state, such as performing a rollover. For more information, see [Actions](#actions).
|
||||
- The conditions that must be met for an index to move into a new state, known as *transitions*. For example, if an index is more than eight weeks old, you might want to move it to the "delete" state. For more information, see [Transitions](#transitions).
|
||||
|
||||
In other words, a policy defines the *states* that an index can be in, the *actions* to perform when in a state, and the conditions that must be met to *transition* between states.
|
||||
|
||||
You have complete flexibility in the way you can design your policies. You can create any state, transition to any other state, and specify any number of actions in each state.
|
||||
|
||||
This table lists the relevant fields of a policy.
|
||||
|
||||
Field | Description | Type | Required | Read Only
|
||||
:--- | :--- |:--- |:--- |
|
||||
`policy_id` | The name of the policy. | `string` | Yes | Yes
|
||||
`description` | A human-readable description of the policy. | `string` | Yes | No
|
||||
`ism_template` | Specify an ISM template pattern that matches the index to apply the policy. | `nested list of objects` | No | No
|
||||
`last_updated_time` | The time the policy was last updated. | `timestamp` | Yes | Yes
|
||||
`error_notification` | The destination and message template for error notifications. The destination could be Amazon Chime, Slack, or a webhook URL. | `object` | No | No
|
||||
`default_state` | The default starting state for each index that uses this policy. | `string` | Yes | No
|
||||
`states` | The states that you define in the policy. | `nested list of objects` | Yes | No
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## States
|
||||
|
||||
A state is the description of the status that the managed index is currently in. A managed index can be in only one state at a time. Each state has associated actions that are executed sequentially on entering a state and transitions that are checked after all the actions have been completed.
|
||||
|
||||
This table lists the parameters that you can define for a state.
|
||||
|
||||
Field | Description | Type | Required
|
||||
:--- | :--- |:--- |:--- |
|
||||
`name` | The name of the state. | `string` | Yes
|
||||
`actions` | The actions to execute after entering a state. For more information, see [Actions](#actions). | `nested list of objects` | Yes
|
||||
`transitions` | The next states and the conditions required to transition to those states. If no transitions exist, the policy assumes that it's complete and can now stop managing the index. For more information, see [Transitions](#transitions). | `nested list of objects` | Yes
|
||||
|
||||
---
|
||||
|
||||
## Actions
|
||||
|
||||
Actions are the steps that the policy sequentially executes on entering a specific state.
|
||||
|
||||
They are executed in the order in which they are defined.
|
||||
|
||||
This table lists the parameters that you can define for an action.
|
||||
|
||||
Parameter | Description | Type | Required | Default
|
||||
:--- | :--- |:--- |:--- |
|
||||
`timeout` | The timeout period for the action. Accepts time units for minutes, hours, and days. | `time unit` | No | -
|
||||
`retry` | The retry configuration for the action. | `object` | No | Specific to action
|
||||
|
||||
The `retry` operation has the following parameters:
|
||||
|
||||
Parameter | Description | Type | Required | Default
|
||||
:--- | :--- |:--- |:--- |
|
||||
`count` | The number of retry counts. | `number` | Yes | -
|
||||
`backoff` | The backoff policy type to use when retrying. | `string` | No | Exponential
|
||||
`delay` | The time to wait between retries. Accepts time units for minutes, hours, and days. | `time unit` | No | 1 minute
|
||||
|
||||
The following example action has a timeout period of one hour. The policy retries this action three times with an exponential backoff policy, with a delay of 10 minutes between each retry:
|
||||
|
||||
```json
|
||||
"actions": {
|
||||
"timeout": "1h",
|
||||
"retry": {
|
||||
"count": 3,
|
||||
"backoff": "exponential",
|
||||
"delay": "10m"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
For a list of available unit types, see [Supported units](../../../opensearch/units/).
|
||||
|
||||
## ISM supported operations
|
||||
|
||||
ISM supports the following operations:
|
||||
|
||||
- [force_merge](#forcemerge)
|
||||
- [read_only](#read_only)
|
||||
- [read_write](#read_write)
|
||||
- [replica_count](#replica_count)
|
||||
- [close](#close)
|
||||
- [open](#open)
|
||||
- [delete](#delete)
|
||||
- [rollover](#rollover)
|
||||
- [notification](#notification)
|
||||
- [snapshot](#snapshot)
|
||||
- [index_priority](#index_priority)
|
||||
- [allocation](#allocation)
|
||||
|
||||
### force_merge
|
||||
|
||||
Reduces the number of Lucene segments by merging the segments of individual shards. This operation attempts to set the index to a `read-only` state before starting the merging process.
|
||||
|
||||
Parameter | Description | Type | Required
|
||||
:--- | :--- |:--- |:--- |
|
||||
`max_num_segments` | The number of segments to reduce the shard to. | `number` | Yes
|
||||
|
||||
```json
|
||||
{
|
||||
"force_merge": {
|
||||
"max_num_segments": 1
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### read_only
|
||||
|
||||
Sets a managed index to be read only.
|
||||
|
||||
```json
|
||||
{
|
||||
"read_only": {}
|
||||
}
|
||||
```
|
||||
|
||||
### read_write
|
||||
|
||||
Sets a managed index to be writeable.
|
||||
|
||||
```json
|
||||
{
|
||||
"read_write": {}
|
||||
}
|
||||
```
|
||||
|
||||
### replica_count
|
||||
|
||||
Sets the number of replicas to assign to an index.
|
||||
|
||||
Parameter | Description | Type | Required
|
||||
:--- | :--- |:--- |:--- |
|
||||
`number_of_replicas` | Defines the number of replicas to assign to an index. | `number` | Yes
|
||||
|
||||
```json
|
||||
{
|
||||
"replica_count": {
|
||||
"number_of_replicas": 2
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
For information about setting replicas, see [Primary and replica shards](../../../opensearch/#primary-and-replica-shards).
|
||||
|
||||
### close
|
||||
|
||||
Closes the managed index.
|
||||
|
||||
```json
|
||||
{
|
||||
"close": {}
|
||||
}
|
||||
```
|
||||
|
||||
Closed indices remain on disk, but consume no CPU or memory. You can't read from, write to, or search closed indices.
|
||||
|
||||
Closing an index is a good option if you need to retain data for longer than you need to actively search it and have sufficient disk space on your data nodes. If you need to search the data again, reopening a closed index is simpler than restoring an index from a snapshot.
|
||||
|
||||
### open
|
||||
|
||||
Opens a managed index.
|
||||
|
||||
```json
|
||||
{
|
||||
"open": {}
|
||||
}
|
||||
```
|
||||
|
||||
### delete
|
||||
|
||||
Deletes a managed index.
|
||||
|
||||
```json
|
||||
{
|
||||
"delete": {}
|
||||
}
|
||||
```
|
||||
|
||||
### rollover
|
||||
|
||||
Rolls an alias over to a new index when the managed index meets one of the rollover conditions.
|
||||
|
||||
The index format must match the pattern: `^.*-\d+$`. For example, `(logs-000001)`.
|
||||
Set `index.plugins.index_state_management.rollover_alias` as the alias to rollover.
|
||||
|
||||
Parameter | Description | Type | Example | Required
|
||||
:--- | :--- |:--- |:--- |
|
||||
`min_size` | The minimum size of the total primary shard storage (not counting replicas) required to roll over the index. For example, if you set `min_size` to 100 GiB and your index has 5 primary shards and 5 replica shards of 20 GiB each, the total size of the primaries is 100 GiB, so the rollover occurs. ISM doesn't check indices continually, so it doesn't roll over indices at exactly 100 GiB. Instead, if an index is continuously growing, ISM might check it at 99 GiB, not perform the rollover, check again when the shards reach 105 GiB, and then perform the operation. | `string` | `20gb` or `5mb` | No
|
||||
`min_doc_count` | The minimum number of documents required to roll over the index. | `number` | `2000000` | No
|
||||
`min_index_age` | The minimum age required to roll over the index. Index age is the time between its creation and the present. | `string` | `5d` or `7h` | No
|
||||
|
||||
```json
|
||||
{
|
||||
"rollover": {
|
||||
"min_size": "50gb"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"rollover": {
|
||||
"min_doc_count": 100000000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"rollover": {
|
||||
"min_index_age": "30d"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### notification
|
||||
|
||||
Sends you a notification.
|
||||
|
||||
Parameter | Description | Type | Required
|
||||
:--- | :--- |:--- |:--- |
|
||||
`destination` | The destination URL. | `Slack, Amazon Chime, or webhook URL` | Yes
|
||||
`message_template` | The text of the message. You can add variables to your messages using [Mustache templates](https://mustache.github.io/mustache.5.html). | `object` | Yes
|
||||
|
||||
The destination system **must** return a response otherwise the notification operation throws an error.
|
||||
|
||||
#### Example 1: Chime notification
|
||||
|
||||
```json
|
||||
{
|
||||
"notification": {
|
||||
"destination": {
|
||||
"chime": {
|
||||
"url": "<url>"
|
||||
}
|
||||
},
|
||||
"message_template": {
|
||||
"source": "the index is {% raw %}{{ctx.index}}{% endraw %}"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Example 2: Custom webhook notification
|
||||
|
||||
```json
|
||||
{
|
||||
"notification": {
|
||||
"destination": {
|
||||
"custom_webhook": {
|
||||
"url": "https://<your_webhook>"
|
||||
}
|
||||
},
|
||||
"message_template": {
|
||||
"source": "the index is {% raw %}{{ctx.index}}{% endraw %}"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Example 3: Slack notification
|
||||
|
||||
```json
|
||||
{
|
||||
"notification": {
|
||||
"destination": {
|
||||
"slack": {
|
||||
"url": "https://hooks.slack.com/services/xxx/xxxxxx"
|
||||
}
|
||||
},
|
||||
"message_template": {
|
||||
"source": "the index is {% raw %}{{ctx.index}}{% endraw %}"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can use `ctx` variables in your message to represent a number of policy parameters based on the past executions of your policy. For example, if your policy has a rollover action, you can use `{% raw %}{{ctx.action.name}}{% endraw %}` in your message to represent the name of the rollover.
|
||||
|
||||
The following `ctx` variable options are available for every policy:
|
||||
|
||||
#### Guaranteed variables
|
||||
|
||||
Parameter | Description | Type
|
||||
:--- | :--- |:--- |:--- |
|
||||
`index` | The name of the index. | `string`
|
||||
`index_uuid` | The uuid of the index. | `string`
|
||||
`policy_id` | The name of the policy. | `string`
|
||||
|
||||
### snapshot
|
||||
|
||||
Backup your cluster’s indices and state. For more information about snapshots, see [Take and restore snapshots](../../../opensearch/snapshot-restore/).
|
||||
|
||||
The `snapshot` operation has the following parameters:
|
||||
|
||||
Parameter | Description | Type | Required | Default
|
||||
:--- | :--- |:--- |:--- |
|
||||
`repository` | The repository name that you register through the native snapshot API operations. | `string` | Yes | -
|
||||
`snapshot` | The name of the snapshot. | `string` | Yes | -
|
||||
|
||||
```json
|
||||
{
|
||||
"snapshot": {
|
||||
"repository": "my_backup",
|
||||
"snapshot": "my_snapshot"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### index_priority
|
||||
|
||||
Set the priority for the index in a specific state. Unallocated shards of indices are recovered in the order of their priority, whenever possible. The indices with higher priority values are recovered first followed by the indices with lower priority values.
|
||||
|
||||
The `index_priority` operation has the following parameter:
|
||||
|
||||
Parameter | Description | Type | Required | Default
|
||||
:--- | :--- |:--- |:--- |:---
|
||||
`priority` | The priority for the index as soon as it enters a state. | `number` | Yes | 1
|
||||
|
||||
```json
|
||||
"actions": [
|
||||
{
|
||||
"index_priority": {
|
||||
"priority": 50
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### allocation
|
||||
|
||||
Allocate the index to a node with a specific attribute.
|
||||
For example, setting `require` to `warm` moves your data only to "warm" nodes.
|
||||
|
||||
The `allocation` operation has the following parameters:
|
||||
|
||||
Parameter | Description | Type | Required
|
||||
:--- | :--- |:--- |:---
|
||||
`require` | Allocate the index to a node with a specified attribute. | `string` | Yes
|
||||
`include` | Allocate the index to a node with any of the specified attributes. | `string` | Yes
|
||||
`exclude` | Don’t allocate the index to a node with any of the specified attributes. | `string` | Yes
|
||||
`wait_for` | Wait for the policy to execute before allocating the index to a node with a specified attribute. | `string` | Yes
|
||||
|
||||
```json
|
||||
"actions": [
|
||||
{
|
||||
"allocation": {
|
||||
"require": { "box_type": "warm" }
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Transitions
|
||||
|
||||
Transitions define the conditions that need to be met for a state to change. After all actions in the current state are completed, the policy starts checking the conditions for transitions.
|
||||
|
||||
Transitions are evaluated in the order in which they are defined. For example, if the conditions for the first transition are met, then this transition takes place and the rest of the transitions are dismissed.
|
||||
|
||||
If you don't specify any conditions in a transition and leave it empty, then it's assumed to be the equivalent of always true. This means that the policy transitions the index to this state the moment it checks.
|
||||
|
||||
This table lists the parameters you can define for transitions.
|
||||
|
||||
Parameter | Description | Type | Required
|
||||
:--- | :--- |:--- |:--- |
|
||||
`state_name` | The name of the state to transition to if the conditions are met. | `string` | Yes
|
||||
`conditions` | List the conditions for the transition. | `list` | Yes
|
||||
|
||||
The `conditions` object has the following parameters:
|
||||
|
||||
Parameter | Description | Type | Required
|
||||
:--- | :--- |:--- |:--- |
|
||||
`min_index_age` | The minimum age of the index required to transition. | `string` | No
|
||||
`min_doc_count` | The minimum document count of the index required to transition. | `number` | No
|
||||
`min_size` | The minimum size of the index required to transition. | `string` | No
|
||||
`cron` | The `cron` job that triggers the transition if no other transition happens first. | `object` | No
|
||||
`cron.cron.expression` | The `cron` expression that triggers the transition. | `string` | Yes
|
||||
`cron.cron.timezone` | The timezone that triggers the transition. | `string` | Yes
|
||||
|
||||
The following example transitions the index to a `cold` state after a period of 30 days:
|
||||
|
||||
```json
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "cold",
|
||||
"conditions": {
|
||||
"min_index_age": "30d"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
ISM checks the conditions on every execution of the policy based on the set interval.
|
||||
|
||||
This example uses the `cron` condition to transition indices every Saturday at 5:00 PT:
|
||||
|
||||
```json
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "cold",
|
||||
"conditions": {
|
||||
"cron": {
|
||||
"cron": {
|
||||
"expression": "* 17 * * SAT",
|
||||
"timezone": "America/Los_Angeles"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
Note that this condition does not execute at exactly 5:00 PM; the job still executes based off the `job_interval` setting. Due to this variance in start time and the amount of time that it can take for actions to complete prior to checking transition conditions, we recommend against overly narrow cron expressions. For example, don't use `15 17 * * SAT` (5:15 PM on Saturday).
|
||||
|
||||
A window of an hour, which this example uses, is generally sufficient, but you might increase it to 2--3 hours to avoid missing the window and having to wait a week for the transition to occur. Alternately, you could use a broader expression such as `* * * * SAT,SUN` to have the transition occur at any time during the weekend.
|
||||
|
||||
For information on writing cron expressions, see [Cron expression reference](../../../alerting/cron/).
|
||||
|
||||
---
|
||||
|
||||
## Error notifications
|
||||
|
||||
The `error_notification` operation sends you a notification if your managed index fails.
|
||||
It notifies a single destination with a custom message.
|
||||
|
||||
Set up error notifications at the policy level:
|
||||
|
||||
```json
|
||||
{
|
||||
"policy": {
|
||||
"description": "hot warm delete workflow",
|
||||
"default_state": "hot",
|
||||
"schema_version": 1,
|
||||
"error_notification": { },
|
||||
"states": [ ]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Parameter | Description | Type | Required
|
||||
:--- | :--- |:--- |:--- |
|
||||
`destination` | The destination URL. | `Slack, Amazon Chime, or webhook URL` | Yes
|
||||
`message_template` | The text of the message. You can add variables to your messages using [Mustache templates](https://mustache.github.io/mustache.5.html). | `object` | Yes
|
||||
|
||||
The destination system **must** return a response otherwise the `error_notification` operation throws an error.
|
||||
|
||||
#### Example 1: Chime notification
|
||||
|
||||
```json
|
||||
{
|
||||
"error_notification": {
|
||||
"destination": {
|
||||
"chime": {
|
||||
"url": "<url>"
|
||||
}
|
||||
},
|
||||
"message_template": {
|
||||
"source": "The index {% raw %}{{ctx.index}}{% endraw %} failed during policy execution."
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Example 2: Custom webhook notification
|
||||
|
||||
```json
|
||||
{
|
||||
"error_notification": {
|
||||
"destination": {
|
||||
"custom_webhook": {
|
||||
"url": "https://<your_webhook>"
|
||||
}
|
||||
},
|
||||
"message_template": {
|
||||
"source": "The index {% raw %}{{ctx.index}}{% endraw %} failed during policy execution."
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Example 3: Slack notification
|
||||
|
||||
```json
|
||||
{
|
||||
"error_notification": {
|
||||
"destination": {
|
||||
"slack": {
|
||||
"url": "https://hooks.slack.com/services/xxx/xxxxxx"
|
||||
}
|
||||
},
|
||||
"message_template": {
|
||||
"source": "The index {% raw %}{{ctx.index}}{% endraw %} failed during policy execution."
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can use the same options for `ctx` variables as the [notification](#notification) operation.
|
||||
|
||||
## Sample policy with ISM template
|
||||
|
||||
The following sample template policy is for a rollover use case.
|
||||
|
||||
1. Create a policy with an `ism_template` field:
|
||||
|
||||
```json
|
||||
PUT _plugins/_ism/policies/rollover_policy
|
||||
{
|
||||
"policy": {
|
||||
"description": "Example rollover policy.",
|
||||
"default_state": "rollover",
|
||||
"states": [
|
||||
{
|
||||
"name": "rollover",
|
||||
"actions": [
|
||||
{
|
||||
"rollover": {
|
||||
"min_doc_count": 1
|
||||
}
|
||||
}
|
||||
],
|
||||
"transitions": []
|
||||
}
|
||||
],
|
||||
"ism_template": {
|
||||
"index_patterns": ["log*"],
|
||||
"priority": 100
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You need to specify the `index_patterns` field. If you don't specify a value for `priority`, it defaults to 0.
|
||||
|
||||
2. Set up a template with the `rollover_alias` as `log` :
|
||||
|
||||
```json
|
||||
PUT _index_template/ism_rollover
|
||||
{
|
||||
"index_patterns": ["log*"],
|
||||
"settings": {
|
||||
"plugins.index_state_management.rollover_alias": "log"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. Create an index with the `log` alias:
|
||||
|
||||
```json
|
||||
PUT log-000001
|
||||
{
|
||||
"aliases": {
|
||||
"log": {
|
||||
"is_write_index": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
4. Index a document to trigger the rollover condition:
|
||||
|
||||
```json
|
||||
POST log/_doc
|
||||
{
|
||||
"message": "dummy"
|
||||
}
|
||||
```
|
||||
|
||||
## Example policy
|
||||
|
||||
The following example policy implements a `hot`, `warm`, and `delete` workflow. You can use this policy as a template to prioritize resources to your indices based on their levels of activity.
|
||||
|
||||
In this case, an index is initially in a `hot` state. After a day, it changes to a `warm` state, where the number of replicas increases to 5 to improve the read performance.
|
||||
|
||||
After 30 days, the policy moves this index into a `delete` state. The service sends a notification to a Chime room that the index is being deleted, and then permanently deletes it.
|
||||
|
||||
```json
|
||||
{
|
||||
"policy": {
|
||||
"description": "hot warm delete workflow",
|
||||
"default_state": "hot",
|
||||
"schema_version": 1,
|
||||
"states": [
|
||||
{
|
||||
"name": "hot",
|
||||
"actions": [
|
||||
{
|
||||
"rollover": {
|
||||
"min_index_age": "1d"
|
||||
}
|
||||
}
|
||||
],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "warm"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "warm",
|
||||
"actions": [
|
||||
{
|
||||
"replica_count": {
|
||||
"number_of_replicas": 5
|
||||
}
|
||||
}
|
||||
],
|
||||
"transitions": [
|
||||
{
|
||||
"state_name": "delete",
|
||||
"conditions": {
|
||||
"min_index_age": "30d"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "delete",
|
||||
"actions": [
|
||||
{
|
||||
"notification": {
|
||||
"destination": {
|
||||
"chime": {
|
||||
"url": "<URL>"
|
||||
}
|
||||
},
|
||||
"message_template": {
|
||||
"source": "The index {% raw %}{{ctx.index}}{% endraw %} is being deleted"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"delete": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This diagram shows the `states`, `transitions`, and `actions` of the above policy as a finite-state machine. For more information about finite-state machines, see [Wikipedia](https://en.wikipedia.org/wiki/Finite-state_machine).
|
||||
|
||||
![Policy State Machine](../../images/ism.png)
|
|
@ -0,0 +1,50 @@
|
|||
---
|
||||
layout: default
|
||||
title: Settings
|
||||
parent: Index State Management
|
||||
grand_parent: Index management
|
||||
redirect_from: /docs/ism/settings/
|
||||
nav_order: 4
|
||||
---
|
||||
|
||||
# ISM Settings
|
||||
|
||||
We don't recommend changing these settings; the defaults should work well for most use cases.
|
||||
|
||||
Index State Management (ISM) stores its configuration in the `.opendistro-ism-config` index. Don't modify this index without using the [ISM API operations](../api/).
|
||||
|
||||
All settings are available using the OpenSearch `_cluster/settings` operation. None require a restart, and all can be marked `persistent` or `transient`.
|
||||
|
||||
Setting | Default | Description
|
||||
:--- | :--- | :---
|
||||
`plugins.index_state_management.enabled` | True | Specifies whether ISM is enabled or not.
|
||||
`plugins.index_state_management.job_interval` | 5 minutes | The interval at which the managed index jobs are run.
|
||||
`plugins.index_state_management.coordinator.sweep_period` | 10 minutes | How often the routine background sweep is run.
|
||||
`plugins.index_state_management.coordinator.backoff_millis` | 50 milliseconds | The backoff time between retries for failures in the `ManagedIndexCoordinator` (such as when we update managed indices).
|
||||
`plugins.index_state_management.coordinator.backoff_count` | 2 | The count of retries for failures in the `ManagedIndexCoordinator`.
|
||||
`plugins.index_state_management.history.enabled` | True | Specifies whether audit history is enabled or not. The logs from ISM are automatically indexed to a logs document.
|
||||
`plugins.index_state_management.history.max_docs` | 2,500,000 | The maximum number of documents before rolling over the audit history index.
|
||||
`plugins.index_state_management.history.max_age` | 24 hours | The maximum age before rolling over the audit history index.
|
||||
`plugins.index_state_management.history.rollover_check_period` | 8 hours | The time between rollover checks for the audit history index.
|
||||
`plugins.index_state_management.history.rollover_retention_period` | 30 days | How long audit history indices are kept.
|
||||
`plugins.index_state_management.allow_list` | All actions | List of actions that you can use.
|
||||
|
||||
|
||||
## Audit history indices
|
||||
|
||||
If you don't want to disable ISM audit history or shorten the retention period, you can create an [index template](../../../opensearch/index-templates/) to reduce the shard count of the history indices:
|
||||
|
||||
```json
|
||||
PUT _index_template/ism_history_indices
|
||||
{
|
||||
"index_patterns": [
|
||||
".opendistro-ism-managed-index-history-*"
|
||||
],
|
||||
"template": {
|
||||
"settings": {
|
||||
"number_of_shards": 1,
|
||||
"number_of_replicas": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,40 @@
|
|||
---
|
||||
layout: default
|
||||
title: Refresh Search Analyzer
|
||||
nav_order: 40
|
||||
parent: Index management
|
||||
has_children: false
|
||||
redirect_from: /docs/ism/refresh-analyzer/
|
||||
has_toc: false
|
||||
---
|
||||
|
||||
# Refresh search analyzer
|
||||
|
||||
With ISM installed, you can refresh search analyzers in real time with the following API:
|
||||
|
||||
```json
|
||||
POST /_plugins/_refresh_search_analyzers/<index or alias or wildcard>
|
||||
```
|
||||
For example, if you change the synonym list in your analyzer, the change takes effect without you needing to close and reopen the index.
|
||||
|
||||
To work, the token filter must have an `updateable` flag of `true`:
|
||||
|
||||
```json
|
||||
{
|
||||
"analyzer": {
|
||||
"my_synonyms": {
|
||||
"tokenizer": "whitespace",
|
||||
"filter": [
|
||||
"synonym"
|
||||
]
|
||||
}
|
||||
},
|
||||
"filter": {
|
||||
"synonym": {
|
||||
"type": "synonym_graph",
|
||||
"synonyms_path": "synonyms.txt",
|
||||
"updateable": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,153 @@
|
|||
---
|
||||
layout: default
|
||||
title: API
|
||||
nav_order: 4
|
||||
parent: k-NN
|
||||
has_children: false
|
||||
---
|
||||
|
||||
# k-NN plugin API
|
||||
|
||||
The k-NN plugin adds two API operations to help you better manage the plugin's functionality.
|
||||
|
||||
|
||||
## Stats
|
||||
|
||||
The k-NN `stats` API provides information about the current status of the k-NN plugin. The plugin keeps track of both cluster-level and node-level statistics. Cluster-level statistics have a single value for the entire cluster. Node-level statistics have a single value for each node in the cluster. You can filter the query by `nodeId` and `statName`:
|
||||
```
|
||||
GET /_plugins/_knn/nodeId1,nodeId2/stats/statName1,statName2
|
||||
```
|
||||
|
||||
Statistic | Description
|
||||
:--- | :---
|
||||
`circuit_breaker_triggered` | Indicates whether the circuit breaker is triggered. This statistic is only relevant to approximate k-NN search.
|
||||
`total_load_time` | The time in nanoseconds that k-NN has taken to load graphs into the cache. This statistic is only relevant to approximate k-NN search.
|
||||
`eviction_count` | The number of graphs that have been evicted from the cache due to memory constraints or idle time. This statistic is only relevant to approximate k-NN search. <br /> **Note**: Explicit evictions that occur because of index deletion aren't counted.
|
||||
`hit_count` | The number of cache hits. A cache hit occurs when a user queries a graph that's already loaded into memory. This statistic is only relevant to approximate k-NN search.
|
||||
`miss_count` | The number of cache misses. A cache miss occurs when a user queries a graph that isn't loaded into memory yet. This statistic is only relevant to approximate k-NN search.
|
||||
`graph_memory_usage` | Current cache size (total size of all graphs in memory) in kilobytes. This statistic is only relevant to approximate k-NN search.
|
||||
`graph_memory_usage_percentage` | The current weight of the cache as a percentage of the maximum cache capacity.
|
||||
`graph_index_requests` | The number of requests to add the `knn_vector` field of a document into a graph.
|
||||
`graph_index_errors` | The number of requests to add the `knn_vector` field of a document into a graph that have produced an error.
|
||||
`graph_query_requests` | The number of graph queries that have been made.
|
||||
`graph_query_errors` | The number of graph queries that have produced an error.
|
||||
`knn_query_requests` | The number of k-NN query requests received.
|
||||
`cache_capacity_reached` | Whether `knn.memory.circuit_breaker.limit` has been reached. This statistic is only relevant to approximate k-NN search.
|
||||
`load_success_count` | The number of times k-NN successfully loaded a graph into the cache. This statistic is only relevant to approximate k-NN search.
|
||||
`load_exception_count` | The number of times an exception occurred when trying to load a graph into the cache. This statistic is only relevant to approximate k-NN search.
|
||||
`indices_in_cache` | For each index that has graphs in the cache, this statistic provides the number of graphs that index has and the total `graph_memory_usage` that index is using, in kilobytes.
|
||||
`script_compilations` | The number of times the k-NN script has been compiled. This value should usually be 1 or 0, but if the cache containing the compiled scripts is filled, the k-NN script might be recompiled. This statistic is only relevant to k-NN score script search.
|
||||
`script_compilation_errors` | The number of errors during script compilation. This statistic is only relevant to k-NN score script search.
|
||||
`script_query_requests` | The total number of script queries. This statistic is only relevant to k-NN score script search.
|
||||
`script_query_errors` | The number of errors during script queries. This statistic is only relevant to k-NN score script search.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
```json
|
||||
GET /_plugins/_knn/stats?pretty
|
||||
{
|
||||
"_nodes" : {
|
||||
"total" : 1,
|
||||
"successful" : 1,
|
||||
"failed" : 0
|
||||
},
|
||||
"cluster_name" : "_run",
|
||||
"circuit_breaker_triggered" : false,
|
||||
"nodes" : {
|
||||
"HYMrXXsBSamUkcAjhjeN0w" : {
|
||||
"eviction_count" : 0,
|
||||
"miss_count" : 1,
|
||||
"graph_memory_usage" : 1,
|
||||
"graph_memory_usage_percentage" : 3.68,
|
||||
"graph_index_requests" : 7,
|
||||
"graph_index_errors" : 1,
|
||||
"knn_query_requests" : 4,
|
||||
"graph_query_requests" : 30,
|
||||
"graph_query_errors" : 15,
|
||||
"indices_in_cache" : {
|
||||
"myindex" : {
|
||||
"graph_memory_usage" : 2,
|
||||
"graph_memory_usage_percentage" : 3.68,
|
||||
"graph_count" : 2
|
||||
}
|
||||
},
|
||||
"cache_capacity_reached" : false,
|
||||
"load_exception_count" : 0,
|
||||
"hit_count" : 0,
|
||||
"load_success_count" : 1,
|
||||
"total_load_time" : 2878745,
|
||||
"script_compilations" : 1,
|
||||
"script_compilation_errors" : 0,
|
||||
"script_query_requests" : 534,
|
||||
"script_query_errors" : 0
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
GET /_plugins/_knn/HYMrXXsBSamUkcAjhjeN0w/stats/circuit_breaker_triggered,graph_memory_usage?pretty
|
||||
{
|
||||
"_nodes" : {
|
||||
"total" : 1,
|
||||
"successful" : 1,
|
||||
"failed" : 0
|
||||
},
|
||||
"cluster_name" : "_run",
|
||||
"circuit_breaker_triggered" : false,
|
||||
"nodes" : {
|
||||
"HYMrXXsBSamUkcAjhjeN0w" : {
|
||||
"graph_memory_usage" : 1
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Warmup operation
|
||||
|
||||
The Hierarchical Navigable Small World (HNSW) graphs used to perform an approximate k-Nearest Neighbor (k-NN) search are stored as `.hnsw` files with other Apache Lucene segment files. In order for you to perform a search on these graphs using the k-NN plugin, the plugin needs to load these files into native memory.
|
||||
|
||||
If the plugin hasn't loaded the graphs into native memory, it loads them when it receives a search request. The loading time can cause high latency during initial queries. To avoid this situation, users often run random queries during a warmup period. After this warmup period, the graphs are loaded into native memory and their production workloads can begin. This loading process is indirect and requires extra effort.
|
||||
|
||||
As an alternative, you can avoid this latency issue by running the k-NN plugin warmup API operation on whatever indices you're interested in searching. This operation loads all the graphs for all of the shards (primaries and replicas) of all the indices specified in the request into native memory.
|
||||
|
||||
After the process finishes, you can start searching against the indices with no initial latency penalties. The warmup API operation is idempotent, so if a segment's graphs are already loaded into memory, this operation has no impact on those graphs. It only loads graphs that aren't currently in memory.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
This request performs a warmup on three indices:
|
||||
|
||||
```json
|
||||
GET /_plugins/_knn/warmup/index1,index2,index3?pretty
|
||||
{
|
||||
"_shards" : {
|
||||
"total" : 6,
|
||||
"successful" : 6,
|
||||
"failed" : 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`total` indicates how many shards the k-NN plugin attempted to warm up. The response also includes the number of shards the plugin succeeded and failed to warm up.
|
||||
|
||||
The call doesn't return results until the warmup operation finishes or the request times out. If the request times out, the operation still continues on the cluster. To monitor the warmup operation, use the OpenSearch `_tasks` API:
|
||||
|
||||
```json
|
||||
GET /_tasks
|
||||
```
|
||||
|
||||
After the operation has finished, use the [k-NN `_stats` API operation](#Stats) to see what the k-NN plugin loaded into the graph.
|
||||
|
||||
|
||||
### Best practices
|
||||
|
||||
For the warmup operation to function properly, follow these best practices:
|
||||
|
||||
* Don't run merge operations on indices that you want to warm up. During merge, the k-NN plugin creates new segments, and old segments are sometimes deleted. For example, you could encounter a situation in which the warmup API operation loads graphs A and B into native memory, but segment C is created from segments A and B being merged. The graphs for A and B would no longer be in memory, and graph C would also not be in memory. In this case, the initial penalty for loading graph C is still present.
|
||||
|
||||
* Confirm that all graphs you want to warm up can fit into native memory. For more information about the native memory limit, see the [knn.memory.circuit_breaker.limit statistic](../settings/#cluster-settings). High graph memory usage causes cache thrashing, which can lead to operations constantly failing and attempting to run again.
|
||||
|
||||
* Don't index any documents that you want to load into the cache. Writing new information to segments prevents the warmup API operation from loading the graphs until they're searchable. This means that you would have to run the warmup operation again after indexing finishes.
|
|
@ -0,0 +1,162 @@
|
|||
---
|
||||
layout: default
|
||||
title: Approximate search
|
||||
nav_order: 1
|
||||
parent: k-NN
|
||||
has_children: false
|
||||
has_math: true
|
||||
---
|
||||
|
||||
# Approximate k-NN search
|
||||
|
||||
The approximate k-NN method uses [nmslib's](https://github.com/nmslib/nmslib/) implementation of the Hierarchical Navigable Small World (HNSW) algorithm to power k-NN search. In this case, approximate means that for a given search, the neighbors returned are an estimate of the true k-nearest neighbors. Of the three methods, this method offers the best search scalability for large data sets. Generally speaking, once the data set gets into the hundreds of thousands of vectors, this approach is preferred.
|
||||
|
||||
The k-NN plugin builds an HNSW graph of the vectors for each "knn-vector field"/ "Lucene segment" pair during indexing that can be used to efficiently find the k-nearest neighbors to a query vector during search. To learn more about Lucene segments, please refer to [Apache Lucene's documentation](https://lucene.apache.org/core/8_7_0/core/org/apache/lucene/codecs/lucene87/package-summary.html#package.description). These graphs are loaded into native memory during search and managed by a cache. To learn more about pre-loading graphs into memory, refer to the [warmup API](../api#warmup). Additionally, you can see what graphs are already loaded in memory, which you can learn more about in the [stats API section](../api#stats).
|
||||
|
||||
Because the graphs are constructed during indexing, it is not possible to apply a filter on an index and then use this search method. All filters are applied on the results produced by the approximate nearest neighbor search.
|
||||
|
||||
## Get started with approximate k-NN
|
||||
|
||||
To use the k-NN plugin's approximate search functionality, you must first create a k-NN index with setting `index.knn` to `true`. This setting tells the plugin to create HNSW graphs for the index.
|
||||
|
||||
Additionally, if you're using the approximate k-nearest neighbor method, specify `knn.space_type` to the space you're interested in. You can't change this setting after it's set. To see what spaces we support, see [spaces](#spaces). By default, `index.knn.space_type` is `l2`. For more information about index settings, such as algorithm parameters you can tweak to tune performance, see [Index settings](../settings#index-settings).
|
||||
|
||||
Next, you must add one or more fields of the `knn_vector` data type. This example creates an index with two `knn_vector` fields and uses cosine similarity:
|
||||
|
||||
```json
|
||||
PUT my-knn-index-1
|
||||
{
|
||||
"settings": {
|
||||
"index": {
|
||||
"knn": true,
|
||||
"knn.space_type": "cosinesimil"
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"my_vector1": {
|
||||
"type": "knn_vector",
|
||||
"dimension": 2
|
||||
},
|
||||
"my_vector2": {
|
||||
"type": "knn_vector",
|
||||
"dimension": 4
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The `knn_vector` data type supports a vector of floats that can have a dimension of up to 10,000, as set by the dimension mapping parameter.
|
||||
|
||||
In OpenSearch, codecs handle the storage and retrieval of indices. The k-NN plugin uses a custom codec to write vector data to graphs so that the underlying k-NN search library can read it.
|
||||
{: .tip }
|
||||
|
||||
After you create the index, you can add some data to it:
|
||||
|
||||
```json
|
||||
POST _bulk
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "1" } }
|
||||
{ "my_vector1": [1.5, 2.5], "price": 12.2 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "2" } }
|
||||
{ "my_vector1": [2.5, 3.5], "price": 7.1 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "3" } }
|
||||
{ "my_vector1": [3.5, 4.5], "price": 12.9 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "4" } }
|
||||
{ "my_vector1": [5.5, 6.5], "price": 1.2 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "5" } }
|
||||
{ "my_vector1": [4.5, 5.5], "price": 3.7 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "6" } }
|
||||
{ "my_vector2": [1.5, 5.5, 4.5, 6.4], "price": 10.3 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "7" } }
|
||||
{ "my_vector2": [2.5, 3.5, 5.6, 6.7], "price": 5.5 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "8" } }
|
||||
{ "my_vector2": [4.5, 5.5, 6.7, 3.7], "price": 4.4 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "9" } }
|
||||
{ "my_vector2": [1.5, 5.5, 4.5, 6.4], "price": 8.9 }
|
||||
|
||||
```
|
||||
|
||||
Then you can execute an approximate nearest neighbor search on the data using the `knn` query type:
|
||||
|
||||
```json
|
||||
GET my-knn-index-1/_search
|
||||
{
|
||||
"size": 2,
|
||||
"query": {
|
||||
"knn": {
|
||||
"my_vector2": {
|
||||
"vector": [2, 3, 5, 6],
|
||||
"k": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`k` is the number of neighbors the search of each graph will return. You must also include the `size` option, which indicates how many results the query actually returns. The plugin returns `k` amount of results for each shard (and each segment) and `size` amount of results for the entire query. The plugin supports a maximum `k` value of 10,000.
|
||||
|
||||
### Using approximate k-NN with filters
|
||||
If you use the `knn` query alongside filters or other clauses (e.g. `bool`, `must`, `match`), you might receive fewer than `k` results. In this example, `post_filter` reduces the number of results from 2 to 1:
|
||||
|
||||
```json
|
||||
GET my-knn-index-1/_search
|
||||
{
|
||||
"size": 2,
|
||||
"query": {
|
||||
"knn": {
|
||||
"my_vector2": {
|
||||
"vector": [2, 3, 5, 6],
|
||||
"k": 2
|
||||
}
|
||||
}
|
||||
},
|
||||
"post_filter": {
|
||||
"range": {
|
||||
"price": {
|
||||
"gte": 5,
|
||||
"lte": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Spaces
|
||||
|
||||
A space corresponds to the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a greater score equates to a better result. To convert distances to OpenSearch scores, we take 1 / (1 + distance). Currently, the k-NN plugin supports the following spaces:
|
||||
|
||||
<table>
|
||||
<thead style="text-align: left">
|
||||
<tr>
|
||||
<th>spaceType</th>
|
||||
<th>Distance Function</th>
|
||||
<th>OpenSearch Score</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tr>
|
||||
<td>l2</td>
|
||||
<td>\[ Distance(X, Y) = \sum_{i=1}^n (X_i - Y_i)^2 \]</td>
|
||||
<td>1 / (1 + Distance Function)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>l1</td>
|
||||
<td>\[ Distance(X, Y) = \sum_{i=1}^n (X_i - Y_i) \]</td>
|
||||
<td>1 / (1 + Distance Function)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>cosinesimil</td>
|
||||
<td>\[ 1 - {A · B \over \|A\| · \|B\|} = 1 -
|
||||
{\sum_{i=1}^n (A_i · B_i) \over \sqrt{\sum_{i=1}^n A_i^2} · \sqrt{\sum_{i=1}^n B_i^2}}\]
|
||||
where \(\|A\|\) and \(\|B\|\) represent normalized vectors.</td>
|
||||
<td>1 / (1 + Distance Function)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>hammingbit</td>
|
||||
<td style="text-align:center">Distance = countSetBits(X \(\oplus\) Y)</td>
|
||||
<td>1 / (1 + Distance Function)</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
The cosine similarity formula does not include the `1 -` prefix. However, because nmslib equates smaller scores with closer results, they return `1 - cosineSimilarity` for their cosine similarity space---that's why `1 -` is included in the distance function.
|
||||
{: .note }
|
|
@ -0,0 +1,42 @@
|
|||
---
|
||||
layout: default
|
||||
title: k-NN
|
||||
nav_order: 50
|
||||
has_children: true
|
||||
has_toc: false
|
||||
---
|
||||
|
||||
# k-NN
|
||||
|
||||
Short for *k-nearest neighbors*, the k-NN plugin enables users to search for the k-nearest neighbors to a query point across an index of vectors. To determine the neighbors, you can specify the space (the distance function) you want to use to measure the distance between points.
|
||||
|
||||
Use cases include recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information on k-NN search, see [Wikipedia](https://en.wikipedia.org/wiki/Nearest_neighbor_search).
|
||||
|
||||
This plugin supports three different methods for obtaining the k-nearest neighbors from an index of vectors:
|
||||
|
||||
1. **Approximate k-NN**
|
||||
|
||||
The first method takes an approximate nearest neighbor approach---it uses the HNSW algorithm to return the approximate k-nearest neighbors to a query vector. This algorithm sacrifices indexing speed and search accuracy in return for lower latency and more scalable search. To learn more about the algorithm, please refer to [nmslib's documentation](https://github.com/nmslib/nmslib/) or [the paper introducing the algorithm](https://arxiv.org/abs/1603.09320).
|
||||
|
||||
Approximate k-NN is the best choice for searches over large indices (i.e. hundreds of thousands of vectors or more) that require low latency. You should not use approximate k-NN if you want to apply a filter on the index before the k-NN search, which greatly reduces the number of vectors to be searched. In this case, you should use either the script scoring method or painless extensions.
|
||||
|
||||
For more details about this method, see [Approximate k-NN search](approximate-knn).
|
||||
|
||||
2. **Script Score k-NN**
|
||||
|
||||
The second method extends OpenSearch's script scoring functionality to execute a brute force, exact k-NN search over "knn_vector" fields or fields that can represent binary objects. With this approach, you can run k-NN search on a subset of vectors in your index (sometimes referred to as a pre-filter search).
|
||||
|
||||
Use this approach for searches over smaller bodies of documents or when a pre-filter is needed. Using this approach on large indices may lead to high latencies.
|
||||
|
||||
For more details about this method, see [Exact k-NN with scoring script](knn-score-script).
|
||||
|
||||
3. **Painless extensions**
|
||||
|
||||
The third method adds the distance functions as painless extensions that you can use in more complex combinations. Similar to the k-NN Script Score, you can use this method to perform a brute force, exact k-NN search across an index, which also supports pre-filtering.
|
||||
|
||||
This approach has slightly slower query performance compared to the k-NN Script Score. If your use case requires more customization over the final score, you should use this approach over Script Score k-NN.
|
||||
|
||||
For more details about this method, see [Painless scripting functions](painless-functions).
|
||||
|
||||
|
||||
Overall, for larger data sets, you should generally choose the approximate nearest neighbor method because it scales significantly better. For smaller data sets, where you may want to apply a filter, you should choose the custom scoring approach. If you have a more complex use case where you need to use a distance function as part of their scoring method, you should use the painless scripting approach.
|
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
layout: default
|
||||
title: JNI library
|
||||
nav_order: 5
|
||||
parent: k-NN
|
||||
has_children: false
|
||||
---
|
||||
|
||||
# JNI library
|
||||
To integrate [nmslib's](https://github.com/nmslib/nmslib/) approximate k-NN functionality (implemented in C++) into the k-NN plugin (implemented in Java), we created a Java Native Interface library, which lets the k-NN plugin leverage nmslib's functionality. To see how we build the JNI library binary and learn how to get the most of it in your production environment, see [JNI Library Artifacts](https://github.com/opensearch-project/k-NN#jni-library-artifacts).
|
||||
|
||||
For more information about JNI, see [Java Native Interface](https://en.wikipedia.org/wiki/Java_Native_Interface) on Wikipedia.
|
|
@ -0,0 +1,316 @@
|
|||
---
|
||||
layout: default
|
||||
title: Exact k-NN with scoring script
|
||||
nav_order: 2
|
||||
parent: k-NN
|
||||
has_children: false
|
||||
has_math: true
|
||||
---
|
||||
|
||||
# Exact k-NN with scoring script
|
||||
The k-NN plugin implements the OpenSearch score script plugin that you can use to find the exact k-nearest neighbors to a given query point. Using the k-NN score script, you can apply a filter on an index before executing the nearest neighbor search. This is useful for dynamic search cases where the index body may vary based on other conditions.
|
||||
|
||||
Because the score script approach executes a brute force search, it doesn't scale as well as the [approximate approach](../approximate-knn). In some cases, it might be better to think about refactoring your workflow or index structure to use the approximate approach instead of the score script approach.
|
||||
|
||||
## Getting started with the score script for vectors
|
||||
|
||||
Similar to approximate nearest neighbor search, in order to use the score script on a body of vectors, you must first create an index with one or more `knn_vector` fields.
|
||||
|
||||
If you intend to just use the score script approach (and not the approximate approach) you can set `index.knn` to `false` and not set `index.knn.space_type`. You can choose the space type during search. See [spaces](#spaces) for the spaces the k-NN score script suppports.
|
||||
|
||||
This example creates an index with two `knn_vector` fields:
|
||||
|
||||
```json
|
||||
PUT my-knn-index-1
|
||||
{
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"my_vector1": {
|
||||
"type": "knn_vector",
|
||||
"dimension": 2
|
||||
},
|
||||
"my_vector2": {
|
||||
"type": "knn_vector",
|
||||
"dimension": 4
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If you *only* want to use the score script, you can omit `"index.knn": true`. The benefit of this approach is faster indexing speed and lower memory usage, but you lose the ability to perform standard k-NN queries on the index.
|
||||
{: .tip}
|
||||
|
||||
After you create the index, you can add some data to it:
|
||||
|
||||
```json
|
||||
POST _bulk
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "1" } }
|
||||
{ "my_vector1": [1.5, 2.5], "price": 12.2 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "2" } }
|
||||
{ "my_vector1": [2.5, 3.5], "price": 7.1 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "3" } }
|
||||
{ "my_vector1": [3.5, 4.5], "price": 12.9 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "4" } }
|
||||
{ "my_vector1": [5.5, 6.5], "price": 1.2 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "5" } }
|
||||
{ "my_vector1": [4.5, 5.5], "price": 3.7 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "6" } }
|
||||
{ "my_vector2": [1.5, 5.5, 4.5, 6.4], "price": 10.3 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "7" } }
|
||||
{ "my_vector2": [2.5, 3.5, 5.6, 6.7], "price": 5.5 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "8" } }
|
||||
{ "my_vector2": [4.5, 5.5, 6.7, 3.7], "price": 4.4 }
|
||||
{ "index": { "_index": "my-knn-index-1", "_id": "9" } }
|
||||
{ "my_vector2": [1.5, 5.5, 4.5, 6.4], "price": 8.9 }
|
||||
|
||||
```
|
||||
|
||||
Finally, you can execute an exact nearest neighbor search on the data using the `knn` script:
|
||||
```json
|
||||
GET my-knn-index-1/_search
|
||||
{
|
||||
"size": 4,
|
||||
"query": {
|
||||
"script_score": {
|
||||
"query": {
|
||||
"match_all": {}
|
||||
},
|
||||
"script": {
|
||||
"source": "knn_score",
|
||||
"lang": "knn",
|
||||
"params": {
|
||||
"field": "my_vector2",
|
||||
"query_value": [2.0, 3.0, 5.0, 6.0],
|
||||
"space_type": "cosinesimil"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
All parameters are required.
|
||||
|
||||
- `lang` is the script type. This value is usually `painless`, but here you must specify `knn`.
|
||||
- `source` is the name of the script, `knn_score`.
|
||||
|
||||
This script is part of the k-NN plugin and isn't available at the standard `_scripts` path. A GET request to `_cluster/state/metadata` doesn't return it, either.
|
||||
|
||||
- `field` is the field that contains your vector data.
|
||||
- `query_value` is the point you want to find the nearest neighbors for. For the Euclidean and cosine similarity spaces, the value must be an array of floats that matches the dimension set in the field's mapping. For Hamming bit distance, this value can be either of type signed long or a base64-encoded string (for the long and binary field types, respectively).
|
||||
- `space_type` corresponds to the distance function. See the [spaces section](#spaces).
|
||||
|
||||
The [post filter example in the approximate approach](../approximate-knn/#using-approximate-k-nn-with-filters) shows a search that returns fewer than `k` results. If you want to avoid this situation, the score script method lets you essentially invert the order of events. In other words, you can filter down the set of documents over which to execute the k-nearest neighbor search.
|
||||
|
||||
This example shows a pre-filter approach to k-NN search with the score script approach. First, create the index:
|
||||
|
||||
```json
|
||||
PUT my-knn-index-2
|
||||
{
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"my_vector": {
|
||||
"type": "knn_vector",
|
||||
"dimension": 2
|
||||
},
|
||||
"color": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Then add some documents:
|
||||
|
||||
```json
|
||||
POST _bulk
|
||||
{ "index": { "_index": "my-knn-index-2", "_id": "1" } }
|
||||
{ "my_vector": [1, 1], "color" : "RED" }
|
||||
{ "index": { "_index": "my-knn-index-2", "_id": "2" } }
|
||||
{ "my_vector": [2, 2], "color" : "RED" }
|
||||
{ "index": { "_index": "my-knn-index-2", "_id": "3" } }
|
||||
{ "my_vector": [3, 3], "color" : "RED" }
|
||||
{ "index": { "_index": "my-knn-index-2", "_id": "4" } }
|
||||
{ "my_vector": [10, 10], "color" : "BLUE" }
|
||||
{ "index": { "_index": "my-knn-index-2", "_id": "5" } }
|
||||
{ "my_vector": [20, 20], "color" : "BLUE" }
|
||||
{ "index": { "_index": "my-knn-index-2", "_id": "6" } }
|
||||
{ "my_vector": [30, 30], "color" : "BLUE" }
|
||||
|
||||
```
|
||||
|
||||
Finally, use the `script_score` query to pre-filter your documents before identifying nearest neighbors:
|
||||
|
||||
```json
|
||||
GET my-knn-index-2/_search
|
||||
{
|
||||
"size": 2,
|
||||
"query": {
|
||||
"script_score": {
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": {
|
||||
"term": {
|
||||
"color": "BLUE"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"script": {
|
||||
"lang": "knn",
|
||||
"source": "knn_score",
|
||||
"params": {
|
||||
"field": "my_vector",
|
||||
"query_value": [9.9, 9.9],
|
||||
"space_type": "l2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Getting started with the score script for binary data
|
||||
The k-NN score script also allows you to run k-NN search on your binary data with the Hamming distance space.
|
||||
In order to use Hamming distance, the field of interest must have either a `binary` or `long` field type. If you're using `binary` type, the data must be a base64-encoded string.
|
||||
|
||||
This example shows how to use the Hamming distance space with a `binary` field type:
|
||||
|
||||
```json
|
||||
PUT my-index
|
||||
{
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"my_binary": {
|
||||
"type": "binary",
|
||||
"doc_values": true
|
||||
},
|
||||
"color": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Then add some documents:
|
||||
|
||||
```json
|
||||
POST _bulk
|
||||
{ "index": { "_index": "my-index", "_id": "1" } }
|
||||
{ "my_binary": "SGVsbG8gV29ybGQh", "color" : "RED" }
|
||||
{ "index": { "_index": "my-index", "_id": "2" } }
|
||||
{ "my_binary": "ay1OTiBjdXN0b20gc2NvcmluZyE=", "color" : "RED" }
|
||||
{ "index": { "_index": "my-index", "_id": "3" } }
|
||||
{ "my_binary": "V2VsY29tZSB0byBrLU5O", "color" : "RED" }
|
||||
{ "index": { "_index": "my-index", "_id": "4" } }
|
||||
{ "my_binary": "SSBob3BlIHRoaXMgaXMgaGVscGZ1bA==", "color" : "BLUE" }
|
||||
{ "index": { "_index": "my-index", "_id": "5" } }
|
||||
{ "my_binary": "QSBjb3VwbGUgbW9yZSBkb2NzLi4u", "color" : "BLUE" }
|
||||
{ "index": { "_index": "my-index", "_id": "6" } }
|
||||
{ "my_binary": "TGFzdCBvbmUh", "color" : "BLUE" }
|
||||
|
||||
```
|
||||
|
||||
Finally, use the `script_score` query to pre-filter your documents before identifying nearest neighbors:
|
||||
|
||||
```json
|
||||
GET my-index/_search
|
||||
{
|
||||
"size": 2,
|
||||
"query": {
|
||||
"script_score": {
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": {
|
||||
"term": {
|
||||
"color": "BLUE"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"script": {
|
||||
"lang": "knn",
|
||||
"source": "knn_score",
|
||||
"params": {
|
||||
"field": "my_binary",
|
||||
"query_value": "U29tZXRoaW5nIEltIGxvb2tpbmcgZm9y",
|
||||
"space_type": "hammingbit"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Similarly, you can encode your data with the `long` field and run a search:
|
||||
|
||||
```json
|
||||
GET my-long-index/_search
|
||||
{
|
||||
"size": 2,
|
||||
"query": {
|
||||
"script_score": {
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": {
|
||||
"term": {
|
||||
"color": "BLUE"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"script": {
|
||||
"lang": "knn",
|
||||
"source": "knn_score",
|
||||
"params": {
|
||||
"field": "my_long",
|
||||
"query_value": 23,
|
||||
"space_type": "hammingbit"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Spaces
|
||||
|
||||
A space corresponds to the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a greater score equates to a better result. The following table illustrates how OpenSearch converts spaces to scores:
|
||||
|
||||
<table>
|
||||
<thead style="text-align: left">
|
||||
<tr>
|
||||
<th>spaceType</th>
|
||||
<th>Distance Function</th>
|
||||
<th>OpenSearch Score</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tr>
|
||||
<td>l2</td>
|
||||
<td>\[ Distance(X, Y) = \sum_{i=1}^n (X_i - Y_i)^2 \]</td>
|
||||
<td>1 / (1 + Distance Function)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>l1</td>
|
||||
<td>\[ Distance(X, Y) = \sum_{i=1}^n (X_i - Y_i) \]</td>
|
||||
<td>1 / (1 + Distance Function)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>cosinesimil</td>
|
||||
<td>\[ {A · B \over \|A\| · \|B\|} =
|
||||
{\sum_{i=1}^n (A_i · B_i) \over \sqrt{\sum_{i=1}^n A_i^2} · \sqrt{\sum_{i=1}^n B_i^2}}\]
|
||||
where \(\|A\|\) and \(\|B\|\) represent normalized vectors.</td>
|
||||
<td>1 + Distance Function</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>hammingbit</td>
|
||||
<td style="text-align:center">Distance = countSetBits(X \(\oplus\) Y)</td>
|
||||
<td> 1 / (1 + Distance Function)</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
Cosine similarity returns a number between -1 and 1, and because OpenSearch relevance scores can't be below 0, the k-NN plugin adds 1 to get the final score.
|
|
@ -0,0 +1,63 @@
|
|||
---
|
||||
layout: default
|
||||
title: k-NN Painless extensions
|
||||
nav_order: 3
|
||||
parent: k-NN
|
||||
has_children: false
|
||||
has_math: true
|
||||
---
|
||||
|
||||
# k-NN Painless Scripting extensions
|
||||
|
||||
With the k-NN plugin's Painless Scripting extensions, you can use k-NN distance functions directly in your Painless scripts to perform operations on `knn_vector` fields. Painless has a strict list of allowed functions and classes per context to ensure its scripts are secure. The k-NN plugin adds Painless Scripting extensions to a few of the distance functions used in [k-NN score script](../knn-score-script), so you can use them to customize your k-NN workload.
|
||||
|
||||
## Get started with k-NN's Painless Scripting functions
|
||||
|
||||
To use k-NN's Painless Scripting functions, first create an index with `knn_vector` fields like in [k-NN score script](../knn-score-script#Getting-started-with-the-score-script). Once the index is created and you ingest some data, you can use the painless extensions:
|
||||
|
||||
```json
|
||||
GET my-knn-index-2/_search
|
||||
{
|
||||
"size": 2,
|
||||
"query": {
|
||||
"script_score": {
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": {
|
||||
"term": {
|
||||
"color": "BLUE"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"script": {
|
||||
"source": "1.0 + cosineSimilarity(params.query_value, doc[params.field])",
|
||||
"params": {
|
||||
"field": "my_vector",
|
||||
"query_value": [9.9, 9.9]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`field` needs to map to a `knn_vector` field, and `query_value` needs to be a floating point array with the same dimension as `field`.
|
||||
|
||||
## Function types
|
||||
The following table describes the available painless functions the k-NN plugin provides:
|
||||
|
||||
Function name | Function signature | Description
|
||||
:--- | :---
|
||||
l2Squared | `float l2Squared (float[] queryVector, doc['vector field'])` | This function calculates the square of the L2 distance (Euclidean distance) between a given query vector and document vectors. The shorter the distance, the more relevant the document is, so this example inverts the return value of the l2Squared function. If the document vector matches the query vector, the result is 0, so this example also adds 1 to the distance to avoid divide by zero errors.
|
||||
l1Norm | `float l1Norm (float[] queryVector, doc['vector field'])` | This function calculates the square of the L2 distance (Euclidean distance) between a given query vector and document vectors. The shorter the distance, the more relevant the document is, so this example inverts the return value of the l2Squared function. If the document vector matches the query vector, the result is 0, so this example also adds 1 to the distance to avoid divide by zero errors.
|
||||
cosineSimilarity | `float cosineSimilarity (float[] queryVector, doc['vector field'])` | Cosine similarity is an inner product of the query vector and document vector normalized to both have a length of 1. If the magnitude of the query vector doesn't change throughout the query, you can pass the magnitude of the query vector to improve performance, instead of calculating the magnitude every time for every filtered document:<br /> `float cosineSimilarity (float[] queryVector, doc['vector field'], float normQueryVector)` <br />In general, the range of cosine similarity is [-1, 1]. However, in the case of information retrieval, the cosine similarity of two documents ranges from 0 to 1 because the tf-idf statistic can't be negative. Therefore, the k-NN plugin adds 1.0 in order to always yield a positive cosine similarity score.
|
||||
|
||||
## Constraints
|
||||
1. If a document’s `knn_vector` field has different dimensions than the query, the function throws an `IllegalArgumentException`.
|
||||
2. If a vector field doesn't have a value, the function throws an <code>IllegalStateException</code>.
|
||||
You can avoid this situation by first checking if a document has a value in its field:
|
||||
```
|
||||
"source": "doc[params.field].size() == 0 ? 0 : 1 / (1 + l2Squared(params.query_value, doc[params.field]))",
|
||||
```
|
||||
Because scores can only be positive, this script ranks documents with vector fields higher than those without.
|
|
@ -0,0 +1,111 @@
|
|||
---
|
||||
layout: default
|
||||
title: Performance tuning
|
||||
parent: k-NN
|
||||
nav_order: 7
|
||||
---
|
||||
|
||||
# Performance tuning
|
||||
|
||||
This topic provides performance tuning recommendations to improve indexing and search performance for approximate k-NN. From a high level, k-NN works according to these principles:
|
||||
* Graphs are created per knn_vector field / (Lucene) segment pair.
|
||||
* Queries execute on segments sequentially inside the shard (same as any other OpenSearch query).
|
||||
* Each graph in the segment returns <=k neighbors.
|
||||
* The coordinator node picks up final size number of neighbors from the neighbors returned by each shard.
|
||||
|
||||
This topic also provides recommendations for comparing approximate k-NN to exact k-NN with score script.
|
||||
|
||||
## Indexing performance tuning
|
||||
|
||||
Take the following steps to improve indexing performance, especially when you plan to index a large number of vectors at once:
|
||||
|
||||
* **Disable the refresh interval**
|
||||
|
||||
Either disable the refresh interval (default = 1 sec), or set a long duration for the refresh interval to avoid creating multiple small segments:
|
||||
|
||||
```json
|
||||
PUT /<index_name>/_settings
|
||||
{
|
||||
"index" : {
|
||||
"refresh_interval" : "-1"
|
||||
}
|
||||
}
|
||||
```
|
||||
**Note**: Make sure to reenable `refresh_interval` after indexing finishes.
|
||||
|
||||
* **Disable replicas (no OpenSearch replica shard)**
|
||||
|
||||
Set replicas to `0` to prevent duplicate construction of graphs in both primary and replica shards. When you enable replicas after indexing finishes, the serialized graphs are directly copied. If you have no replicas, losing nodes might cause data loss, so it's important that the data lives elsewhere so this initial load can be retried in case of an issue.
|
||||
|
||||
* **Increase the number of indexing threads**
|
||||
|
||||
If the hardware you choose has multiple cores, you can allow multiple threads in graph construction by speeding up the indexing process. Determine the number of threads to allot with the [knn.algo_param.index_thread_qty](../settings/#Cluster-settings) setting.
|
||||
|
||||
Keep an eye on CPU utilization and choose the correct number of threads. Because graph construction is costly, having multiple threads can cause additional CPU load.
|
||||
|
||||
## Search performance tuning
|
||||
|
||||
Take the following steps to improve search performance:
|
||||
|
||||
* **Reduce segment count**
|
||||
|
||||
To improve search performance, you must keep the number of segments under control. Lucene's IndexSearcher searches over all of the segments in a shard to find the 'size' best results. However, because the complexity of search for the HNSW algorithm is logarithmic with respect to the number of vectors, searching over five graphs with 100 vectors each and then taking the top 'size' results from 5*k results will take longer than searching over one graph with 500 vectors and then taking the top size results from k results.
|
||||
|
||||
Ideally, having one segment per shard provides the optimal performance with respect to search latency. You can configure an index to have multiple shards to avoid giant shards and achieve more parallelism.
|
||||
|
||||
You can control the number of segments by choosing a larger refresh interval, or during indexing by asking OpenSearch to slow down segment creation by disabling the refresh interval.
|
||||
|
||||
* **Warm up the index**
|
||||
|
||||
Graphs are constructed during indexing, but they're loaded into memory during the first search. In Lucene, each segment is searched sequentially (so, for k-NN, each segment returns up to k nearest neighbors of the query point), and the top 'size' number of results based on the score are returned from all the results returned by segements at a shard level (higher score = better result).
|
||||
|
||||
Once a graph is loaded (graphs are loaded outside OpenSearch JVM), OpenSearch caches them in memory. Initial queries are expensive and take a few seconds, while subsequent queries are faster and take milliseconds (assuming the k-NN circuit breaker isn't hit).
|
||||
|
||||
To avoid this latency penalty during your first queries, you can use the warmup API operation on the indices you want to search:
|
||||
|
||||
```json
|
||||
GET /_plugins/_knn/warmup/index1,index2,index3?pretty
|
||||
{
|
||||
"_shards" : {
|
||||
"total" : 6,
|
||||
"successful" : 6,
|
||||
"failed" : 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The warmup API operation loads all graphs for all shards (primary and replica) for the specified indices into the cache, so there's no penalty to load graphs during initial searches.
|
||||
|
||||
**Note**: This API operation only loads the segments of the indices it ***sees*** into the cache. If a merge or refresh operation finishes after the API runs, or if you add new documents, you need to rerun the API to load those graphs into memory.
|
||||
|
||||
* **Avoid reading stored fields**
|
||||
|
||||
If your use case is simply to read the IDs and scores of the nearest neighbors, you can disable reading stored fields, which saves time retrieving the vectors from stored fields.
|
||||
|
||||
## Improving recall
|
||||
|
||||
Recall depends on multiple factors like number of vectors, number of dimensions, segments, and so on. Searching over a large number of small segments and aggregating the results leads to better recall than searching over a small number of large segments and aggregating results. The larger the graph, the more chances of losing recall if you're using smaller algorithm parameters. Choosing larger values for algorithm parameters should help solve this issue but sacrifices search latency and indexing time. That being said, it's important to understand your system's requirements for latency and accuracy, and then choose the number of segments you want your index to have based on experimentation.
|
||||
|
||||
To configure recall, adjust the algorithm parameters of the HNSW algorithm exposed through index settings. Algorithm parameters that control recall are `m`, `ef_construction`, and `ef_search`. For more information about how algorithm parameters influence indexing and search recall, see [HNSW algorithm parameters](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md). Increasing these values can help recall and lead to better search results, but at the cost of higher memory utilization and increased indexing time.
|
||||
|
||||
The default recall values work on a broader set of use cases, but make sure to run your own experiments on your data sets and choose the appropriate values. For index-level settings, see [Index settings](../settings#index-settings).
|
||||
|
||||
## Estimating memory usage
|
||||
|
||||
In a typical OpenSearch cluster, a certain portion of RAM is set aside for the JVM heap. The k-NN plugin allocates graphs to a portion of the remaining RAM. This portion's size is determined by the `circuit_breaker_limit` cluster setting. By default, the limit is set at 50%.
|
||||
|
||||
The memory required for graphs is estimated to be `1.1 * (4 * dimension + 8 * M)` bytes/vector.
|
||||
|
||||
As an example, assume you have a million vectors with a dimension of 256 and M of 16. The memory requirement can be estimated as follows:
|
||||
|
||||
```
|
||||
1.1 * (4 *256 + 8 * 16) * 1,000,000 ~= 1.26 GB
|
||||
```
|
||||
|
||||
**Note**: Remember that having a replica doubles the total number of vectors.
|
||||
|
||||
## Approximate nearest neighbor versus score script
|
||||
|
||||
The standard k-NN query and custom scoring option perform differently. Test with a representative set of documents to see if the search results and latencies match your expectations.
|
||||
|
||||
Custom scoring works best if the initial filter reduces the number of documents to no more than 20,000. Increasing shard count can improve latency, but be sure to keep shard size within the [recommended guidelines](../../opensearch/#primary-and-replica-shards).
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
layout: default
|
||||
title: Settings
|
||||
parent: k-NN
|
||||
nav_order: 6
|
||||
---
|
||||
|
||||
# k-NN settings
|
||||
|
||||
The k-NN plugin adds several new index and cluster settings.
|
||||
|
||||
|
||||
## Index settings
|
||||
|
||||
The default values work well for most use cases, but you can change these settings when you create the index.
|
||||
|
||||
Setting | Default | Description
|
||||
:--- | :--- | :---
|
||||
`index.knn.algo_param.ef_search` | 512 | The size of the dynamic list used during k-NN searches. Higher values lead to more accurate but slower searches.
|
||||
`index.knn.algo_param.ef_construction` | 512 | The size of the dynamic list used during k-NN graph creation. Higher values lead to a more accurate graph, but slower indexing speed.
|
||||
`index.knn.algo_param.m` | 16 | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2-100.
|
||||
`index.knn.space_type` | "l2" | The vector space used to calculate the distance between vectors. Currently, the k-NN plugin supports the `l2` space (Euclidean distance) and `cosinesimil` space (cosine similarity). For more information on these spaces, see the [nmslib documentation](https://github.com/nmslib/nmslib/blob/master/manual/spaces.md).
|
||||
|
||||
|
||||
## Cluster settings
|
||||
|
||||
Setting | Default | Description
|
||||
:--- | :--- | :---
|
||||
`knn.algo_param.index_thread_qty` | 1 | The number of threads used for graph creation. Keeping this value low reduces the CPU impact of the k-NN plugin, but also reduces indexing performance.
|
||||
`knn.cache.item.expiry.enabled` | false | Whether to remove graphs that have not been accessed for a certain duration from memory.
|
||||
`knn.cache.item.expiry.minutes` | 3h | If enabled, the idle time before removing a graph from memory.
|
||||
`knn.circuit_breaker.unset.percentage` | 75.0 | The native memory usage threshold for the circuit breaker. Memory usage must be below this percentage of `knn.memory.circuit_breaker.limit` for `knn.circuit_breaker.triggered` to remain false.
|
||||
`knn.circuit_breaker.triggered` | false | True when memory usage exceeds the `knn.circuit_breaker.unset.percentage` value.
|
||||
`knn.memory.circuit_breaker.limit` | 50% | The native memory limit for graphs. At the default value, if a machine has 100 GB of memory and the JVM uses 32 GB, the k-NN plugin uses 50% of the remaining 68 GB (34 GB). If memory usage exceeds this value, k-NN removes the least recently used graphs.
|
||||
`knn.memory.circuit_breaker.enabled` | true | Whether to enable the k-NN memory circuit breaker.
|
||||
`knn.plugin.enabled`| true | Enables or disables the k-NN plugin.
|
|
@ -0,0 +1,196 @@
|
|||
---
|
||||
layout: default
|
||||
title: API
|
||||
parent: Performance Analyzer
|
||||
nav_order: 1
|
||||
---
|
||||
|
||||
# Performance Analyzer API
|
||||
|
||||
Performance Analyzer uses a single HTTP method and URI for most requests:
|
||||
|
||||
```
|
||||
GET <endpoint>:9600/_opensearch/_performanceanalyzer/metrics
|
||||
```
|
||||
|
||||
Note the use of port 9600. Provide parameters for metrics, aggregations, dimensions, and nodes (optional):
|
||||
|
||||
```
|
||||
?metrics=<metrics>&agg=<aggregations>&dim=<dimensions>&nodes=all"
|
||||
```
|
||||
|
||||
For a full list of metrics, see [Metrics reference](../reference/). Performance Analyzer updates its data every five seconds. If you create a custom client, we recommend using that same interval for calls to the API.
|
||||
|
||||
|
||||
#### Sample request
|
||||
|
||||
```
|
||||
GET localhost:9600/_opensearch/_performanceanalyzer/metrics?metrics=Latency,CPU_Utilization&agg=avg,max&dim=ShardID&nodes=all
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"keHlhQbbTpm1BYicficEQg": {
|
||||
"timestamp": 1554940530000,
|
||||
"data": {
|
||||
"fields": [{
|
||||
"name": "ShardID",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Latency",
|
||||
"type": "DOUBLE"
|
||||
},
|
||||
{
|
||||
"name": "CPU_Utilization",
|
||||
"type": "DOUBLE"
|
||||
}
|
||||
],
|
||||
"records": [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
0.012552206029147535
|
||||
],
|
||||
[
|
||||
"1",
|
||||
4.8,
|
||||
0.0009780939762972104
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"bHdpbMJZTs-TKtZro2SmYA": {
|
||||
"timestamp": 1554940530000,
|
||||
"data": {
|
||||
"fields": [{
|
||||
"name": "ShardID",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Latency",
|
||||
"type": "DOUBLE"
|
||||
},
|
||||
{
|
||||
"name": "CPU_Utilization",
|
||||
"type": "DOUBLE"
|
||||
}
|
||||
],
|
||||
"records": [
|
||||
[
|
||||
null,
|
||||
18.2,
|
||||
0.011966493817311527
|
||||
],
|
||||
[
|
||||
"1",
|
||||
14.8,
|
||||
0.0007670829370071493
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
In this case, each top-level object represents a node. The API returns names and data types for the metrics and dimensions that you specified, along with values from five seconds ago and current values (if different). Null values represent inactivity during that time period.
|
||||
|
||||
Performance Analyzer has one additional URI that returns the unit for each metric.
|
||||
|
||||
|
||||
#### Sample request
|
||||
|
||||
```
|
||||
GET localhost:9600/_opensearch/_performanceanalyzer/metrics/units
|
||||
```
|
||||
|
||||
|
||||
#### Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"Disk_Utilization": "%",
|
||||
"Cache_Request_Hit": "count",
|
||||
"TermVectors_Memory": "B",
|
||||
"Segments_Memory": "B",
|
||||
"HTTP_RequestDocs": "count",
|
||||
"Net_TCP_Lost": "segments/flow",
|
||||
"Refresh_Time": "ms",
|
||||
"GC_Collection_Event": "count",
|
||||
"Merge_Time": "ms",
|
||||
"Sched_CtxRate": "count/s",
|
||||
"Cache_Request_Size": "B",
|
||||
"ThreadPool_QueueSize": "count",
|
||||
"Sched_Runtime": "s/ctxswitch",
|
||||
"Disk_ServiceRate": "MB/s",
|
||||
"Heap_AllocRate": "B/s",
|
||||
"Heap_Max": "B",
|
||||
"Sched_Waittime": "s/ctxswitch",
|
||||
"ShardBulkDocs": "count",
|
||||
"Thread_Blocked_Time": "s/event",
|
||||
"VersionMap_Memory": "B",
|
||||
"Master_Task_Queue_Time": "ms",
|
||||
"Merge_CurrentEvent": "count",
|
||||
"Indexing_Buffer": "B",
|
||||
"Bitset_Memory": "B",
|
||||
"Norms_Memory": "B",
|
||||
"Net_PacketDropRate4": "packets/s",
|
||||
"Heap_Committed": "B",
|
||||
"Net_PacketDropRate6": "packets/s",
|
||||
"Thread_Blocked_Event": "count",
|
||||
"GC_Collection_Time": "ms",
|
||||
"Cache_Query_Miss": "count",
|
||||
"IO_TotThroughput": "B/s",
|
||||
"Latency": "ms",
|
||||
"Net_PacketRate6": "packets/s",
|
||||
"Cache_Query_Hit": "count",
|
||||
"IO_ReadSyscallRate": "count/s",
|
||||
"Net_PacketRate4": "packets/s",
|
||||
"Cache_Request_Miss": "count",
|
||||
"CB_ConfiguredSize": "B",
|
||||
"CB_TrippedEvents": "count",
|
||||
"ThreadPool_RejectedReqs": "count",
|
||||
"Disk_WaitTime": "ms",
|
||||
"Net_TCP_TxQ": "segments/flow",
|
||||
"Master_Task_Run_Time": "ms",
|
||||
"IO_WriteSyscallRate": "count/s",
|
||||
"IO_WriteThroughput": "B/s",
|
||||
"Flush_Event": "count",
|
||||
"Net_TCP_RxQ": "segments/flow",
|
||||
"Refresh_Event": "count",
|
||||
"Points_Memory": "B",
|
||||
"Flush_Time": "ms",
|
||||
"Heap_Init": "B",
|
||||
"CPU_Utilization": "cores",
|
||||
"HTTP_TotalRequests": "count",
|
||||
"ThreadPool_ActiveThreads": "count",
|
||||
"Cache_Query_Size": "B",
|
||||
"Paging_MinfltRate": "count/s",
|
||||
"Merge_Event": "count",
|
||||
"Net_TCP_SendCWND": "B/flow",
|
||||
"Cache_Request_Eviction": "count",
|
||||
"Segments_Total": "count",
|
||||
"Terms_Memory": "B",
|
||||
"DocValues_Memory": "B",
|
||||
"Heap_Used": "B",
|
||||
"Cache_FieldData_Eviction": "count",
|
||||
"IO_TotalSyscallRate": "count/s",
|
||||
"CB_EstimatedSize": "B",
|
||||
"Net_Throughput": "B/s",
|
||||
"Paging_RSS": "pages",
|
||||
"Indexing_ThrottleTime": "ms",
|
||||
"StoredFields_Memory": "B",
|
||||
"IndexWriter_Memory": "B",
|
||||
"Master_PendingQueueSize": "count",
|
||||
"Net_TCP_SSThresh": "B/flow",
|
||||
"Cache_FieldData_Size": "B",
|
||||
"Paging_MajfltRate": "count/s",
|
||||
"ThreadPool_TotalThreads": "count",
|
||||
"IO_ReadThroughput": "B/s",
|
||||
"ShardEvents": "count",
|
||||
"Net_TCP_NumFlows": "count"
|
||||
}
|
||||
```
|
|
@ -0,0 +1,162 @@
|
|||
---
|
||||
layout: default
|
||||
title: Create Dashboards
|
||||
parent: Performance Analyzer
|
||||
nav_order: 2
|
||||
---
|
||||
|
||||
# PerfTop dashboards
|
||||
|
||||
Dashboards are defined in JSON and composed of three main elements: tables, line graphs, and bar graphs. You define a grid of rows and columns and then place elements within that grid, with each element spanning as many rows and columns as you specify.
|
||||
|
||||
The best way to get started with building custom dashboards is to duplicate and modify one of the existing JSON files in the `dashboards` directory.
|
||||
{: .tip }
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Summary of elements
|
||||
|
||||
- Tables show metrics per dimension. For example, if your metric is `CPU_Utilization` and your dimension `ShardID`, a PerfTop table shows a row for each shard on each node.
|
||||
- Bar graphs are aggregated for the cluster, unless you add `nodeName` to the dashboard. See the [options for all elements](#all-elements).
|
||||
- Line graphs are aggregated for each node. Each line represents a node.
|
||||
|
||||
|
||||
## Position elements
|
||||
|
||||
PerfTop positions elements within a grid. For example, consider this 12 * 12 grid.
|
||||
|
||||
![Dashboard grid](../../images/perftop-grid.png)
|
||||
|
||||
The upper-left of the grid represents row 0, column 0, so the starting positions for the three boxes are:
|
||||
|
||||
- Orange: row 0, column 0
|
||||
- Purple: row 2, column 2
|
||||
- Green: row 1, column 6
|
||||
|
||||
These boxes span a number of rows and columns. In this case:
|
||||
|
||||
- Orange: 2 rows, 4 columns
|
||||
- Purple: 1 row, 4 columns
|
||||
- Green: 3 rows, 2 columns
|
||||
|
||||
In JSON form, we have the following:
|
||||
|
||||
```json
|
||||
{
|
||||
"gridOptions": {
|
||||
"rows": 12,
|
||||
"cols": 12
|
||||
},
|
||||
"graphs": {
|
||||
"tables": [{
|
||||
"options": {
|
||||
"gridPosition": {
|
||||
"row": 0,
|
||||
"col": 0,
|
||||
"rowSpan": 2,
|
||||
"colSpan": 4
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"options": {
|
||||
"gridPosition": {
|
||||
"row": 2,
|
||||
"col": 2,
|
||||
"rowSpan": 1,
|
||||
"colSpan": 4
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"options": {
|
||||
"gridPosition": {
|
||||
"row": 1,
|
||||
"col": 6,
|
||||
"rowSpan": 3,
|
||||
"colSpan": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
At this point, however, all the JSON does is define the size and position of three tables. To fill elements with data, you specify a query.
|
||||
|
||||
|
||||
## Add queries
|
||||
|
||||
Queries use the same elements as the [REST API](../api/), just in JSON form:
|
||||
|
||||
```json
|
||||
{
|
||||
"queryParams": {
|
||||
"metrics": "estimated,limitConfigured",
|
||||
"aggregates": "avg,avg",
|
||||
"dimensions": "type",
|
||||
"sortBy": "estimated"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
For details on available metrics, see [Metrics reference](../reference/).
|
||||
|
||||
|
||||
## Add options
|
||||
|
||||
Options include labels, colors, and a refresh interval. Different elements types have different options.
|
||||
|
||||
Dashboards support the 16 ANSI colors: black, red, green, yellow, blue, magenta, cyan, and white. For the "bright" variants of these colors, use the numbers 8--15. If your terminal supports 256 colors, you can also use hex codes (e.g. `#6D40ED`).
|
||||
{: .note }
|
||||
|
||||
|
||||
### All elements
|
||||
|
||||
Option | Type | Description
|
||||
:--- | :--- | :---
|
||||
`label` | String or integer | The text in the upper-left corner of the box.
|
||||
`labelColor` | String or integer | The color of the label.
|
||||
`refreshInterval` | Integer | The number of milliseconds between calls to the Performance Analyzer API for new data. Minimum value is 5000.
|
||||
`dimensionFilters` | String array | The dimension value to diplay for the graph. For example, if you query for `metric=Net_Throughput&agg=sum&dim=Direction` and the possible dimension values are `in` and `out`, you can define `dimensionFilters: ["in"]` to only display the metric data for `in` dimension
|
||||
`nodeName` | String | If non-null, lets you restrict elements to individual nodes. You can specify the node name directly in the dashboard file, but the better approach is to use `"nodeName": "#nodeName"` in the dashboard and include the `--nodename <node_name>` argument when starting PerfTop.
|
||||
|
||||
|
||||
### Tables
|
||||
|
||||
Option | Type | Description
|
||||
:--- | :--- | :---
|
||||
`bg` | String or integer | The background color.
|
||||
`fg` | String or integer | The text color.
|
||||
`selectedFg` | String or integer | The text color for focused text.
|
||||
`selectedBg` | String or integer | The background color for focused text.
|
||||
`columnSpacing` | Integer | The amount of space (measured in characters) between columns.
|
||||
`keys` | Boolean | Has no impact at this time.
|
||||
|
||||
|
||||
### Bars
|
||||
|
||||
Option | Type | Description
|
||||
:--- | :--- | :---
|
||||
`barWidth` | Integer | The width of each bar (measured in characters) in the graph.
|
||||
`xOffset` | Integer | The amount of space (measured in characters) between the y-axis and the first bar in the graph.
|
||||
`maxHeight` | Integer | The maximum height of each bar (measured in characters) in the graph.
|
||||
|
||||
|
||||
### Lines
|
||||
|
||||
Option | Type | Description
|
||||
:--- | :--- | :---
|
||||
`showNthLabel` | Integer | Which of the `xAxis` labels to show. For example, `"showNthLabel": 2` shows every other label.
|
||||
`showLegend` | Boolean | Whether or not to display a legend for the line graph.
|
||||
`legend.width` | Integer | The width of the legend (measured in characters) in the graph.
|
||||
`xAxis` | String array | Array of labels for the x-axis. For example, `["0:00", "0:10", "0:20", "0:30", "0:40", "0:50"]`.
|
||||
`colors` | String array | Array of line colors to choose from. For example, `["magenta", "cyan"]`. If you don't provide this value, PerfTop chooses random colors for each line.
|
|
@ -0,0 +1,101 @@
|
|||
---
|
||||
layout: default
|
||||
title: Performance Analyzer
|
||||
nav_order: 58
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Performance Analyzer
|
||||
|
||||
Performance Analyzer is an agent and REST API that allows you to query numerous performance metrics for your cluster, including aggregations of those metrics, independent of the Java Virtual Machine (JVM). PerfTop is the default command line interface (CLI) for displaying those metrics.
|
||||
|
||||
To download PerfTop, see [Download](https://opensearch.org/downloads.html) on the OpenSearch website.
|
||||
|
||||
You can also install it using [npm](https://www.npmjs.com/):
|
||||
|
||||
```bash
|
||||
npm install -g @aws/opensearch-perftop
|
||||
```
|
||||
|
||||
![PerfTop screenshot](../images/perftop.png)
|
||||
|
||||
|
||||
## Get started with PerfTop
|
||||
|
||||
The basic syntax is:
|
||||
|
||||
```bash
|
||||
./perf-top-<operating_system> --dashboard <dashboard>.json --endpoint <endpoint>
|
||||
```
|
||||
|
||||
If you're using npm, the syntax is similar:
|
||||
|
||||
```bash
|
||||
perf-top --dashboard <dashboard> --endpoint <endpoint>
|
||||
```
|
||||
|
||||
If you're running PerfTop from a node (i.e. locally), specify port 9600:
|
||||
|
||||
```bash
|
||||
./perf-top-linux --dashboard dashboards/<dashboard>.json --endpoint localhost:9600
|
||||
```
|
||||
|
||||
Otherwise, just specify the OpenSearch endpoint:
|
||||
|
||||
```bash
|
||||
./perf-top-macos --dashboard dashboards/<dashboard>.json --endpoint my-cluster.my-domain.com
|
||||
```
|
||||
|
||||
PerfTop has four pre-built dashboards in the `dashboards` directory, but you can also [create your own](dashboards/).
|
||||
|
||||
You can also load the pre-built dashboards (ClusterOverview, ClusterNetworkMemoryAnalysis, ClusterThreadAnalysis, or NodeAnalysis) without the JSON files, such as `--dashboard ClusterThreadAnalysis`.
|
||||
|
||||
PerfTop has no interactivity. Start the application, monitor the dashboard, and press esc, q, or Ctrl + C to quit.
|
||||
{: .note }
|
||||
|
||||
|
||||
### Other options
|
||||
|
||||
- For NodeAnalysis and similar custom dashboards, you can add the `--nodename <node_name>` argument if you want your dashboard to display metrics for only a single node.
|
||||
- For troubleshooting, add the `--logfile <log-file>.txt` argument.
|
||||
|
||||
|
||||
## Performance Analyzer configuration
|
||||
|
||||
### Storage
|
||||
|
||||
Performance Analyzer uses `/dev/shm` for temporary storage. During heavy workloads on a cluster, Performance Analyzer can use up to 1 GB of space.
|
||||
|
||||
Docker, however, has a default `/dev/shm` size of 64 MB. To change this value, you can use the `docker run --shm-size 1gb` flag or [a similar setting in Docker Compose](https://docs.docker.com/compose/compose-file/#shm_size).
|
||||
|
||||
If you're not using Docker, check the size of `/dev/shm` using `df -h`. The default value is probably plenty, but if you need to change its size, add the following line to `/etc/fstab`:
|
||||
|
||||
```bash
|
||||
tmpfs /dev/shm tmpfs defaults,noexec,nosuid,size=1G 0 0
|
||||
```
|
||||
|
||||
Then remount the file system:
|
||||
|
||||
```bash
|
||||
mount -o remount /dev/shm
|
||||
```
|
||||
|
||||
|
||||
### Security
|
||||
|
||||
Performance Analyzer supports encryption in transit for requests. It currently does *not* support client or server authentication for requests. To enable encryption in transit, edit `performance-analyzer.properties` in your `$ES_HOME` directory:
|
||||
|
||||
```bash
|
||||
vi $ES_HOME/plugins/opensearch_performance_analyzer/pa_config/performance-analyzer.properties
|
||||
```
|
||||
|
||||
Change the following lines to configure encryption in transit. Note that `certificate-file-path` must be a certificate for the server, not a root CA:
|
||||
|
||||
```
|
||||
https-enabled = true
|
||||
|
||||
#Setup the correct path for certificates
|
||||
certificate-file-path = specify_path
|
||||
|
||||
private-key-file-path = specify_path
|
||||
```
|
|
@ -0,0 +1,63 @@
|
|||
---
|
||||
layout: default
|
||||
title: API
|
||||
parent: Root Cause Analysis
|
||||
grand_parent: Performance Analyzer
|
||||
nav_order: 1
|
||||
---
|
||||
|
||||
# RCA API
|
||||
|
||||
## Sample request
|
||||
|
||||
```
|
||||
# Request all available RCAs
|
||||
GET localhost:9600/_opensearch/_performanceanalyzer/rca
|
||||
|
||||
# Request a specific RCA
|
||||
GET localhost:9600/_opensearch/_performanceanalyzer/rca?name=HighHeapUsageClusterRca
|
||||
```
|
||||
|
||||
|
||||
## Sample response
|
||||
|
||||
```json
|
||||
{
|
||||
"HighHeapUsageClusterRca": [{
|
||||
"rca_name": "HighHeapUsageClusterRca",
|
||||
"state": "unhealthy",
|
||||
"timestamp": 1587426650942,
|
||||
"HotClusterSummary": [{
|
||||
"number_of_nodes": 2,
|
||||
"number_of_unhealthy_nodes": 1,
|
||||
"HotNodeSummary": [{
|
||||
"host_address": "192.168.144.2",
|
||||
"node_id": "JtlEoRowSI6iNpzpjlbp_Q",
|
||||
"HotResourceSummary": [{
|
||||
"resource_type": "old gen",
|
||||
"threshold": 0.65,
|
||||
"value": 0.81827232588145373,
|
||||
"avg": NaN,
|
||||
"max": NaN,
|
||||
"min": NaN,
|
||||
"unit_type": "heap usage in percentage",
|
||||
"time_period_seconds": 600,
|
||||
"TopConsumerSummary": [{
|
||||
"name": "CACHE_FIELDDATA_SIZE",
|
||||
"value": 590702564
|
||||
},
|
||||
{
|
||||
"name": "CACHE_REQUEST_SIZE",
|
||||
"value": 28375
|
||||
},
|
||||
{
|
||||
"name": "CACHE_QUERY_SIZE",
|
||||
"value": 12687
|
||||
}
|
||||
],
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}
|
||||
```
|
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
layout: default
|
||||
title: Root Cause Analysis
|
||||
nav_order: 50
|
||||
parent: Performance Analyzer
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Root Cause Analysis
|
||||
|
||||
The OpenSearch Performance Analyzer plugin (PA) captures OpenSearch and JVM activity, plus their lower-level resource usage (e.g. disk, network, CPU, and memory). Based on this instrumentation, Performance Analyzer computes and exposes diagnostic metrics so that administrators can measure and understand the bottlenecks in their OpenSearch clusters.
|
||||
|
||||
The Root Cause Analysis framework (RCA) uses the information from PA to alert administrators about the root cause of performance and availability issues that their clusters might be experiencing.
|
||||
|
||||
In broad strokes, the framework helps you access data streams from OpenSearch nodes running Performance Analyzer. You write snippets of Java to choose the streams that matter to you and evaluate the streams' PA metrics against certain thresholds. As RCA runs, you can access the state of each analysis using the REST API.
|
||||
|
||||
To learn more about Root Cause Analysis, see [its repository on GitHub](https://github.com/opensearch-project/performance-analyzer-rca).
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
layout: default
|
||||
title: RCA Reference
|
||||
parent: Root Cause Analysis
|
||||
grand_parent: Performance Analyzer
|
||||
nav_order: 3
|
||||
---
|
||||
|
||||
# RCA reference
|
||||
|
||||
You can find a reference of available RCAs and their purposes on [Github](https://github.com/opensearch-project/performance-analyzer-rca/tree/master/docs).
|
|
@ -0,0 +1,560 @@
|
|||
---
|
||||
layout: default
|
||||
title: Metrics Reference
|
||||
parent: Performance Analyzer
|
||||
nav_order: 3
|
||||
---
|
||||
|
||||
# Metrics reference
|
||||
|
||||
This page contains all Performance Analyzer metrics. All metrics support the `avg`, `sum`, `min`, and `max` aggregations, although certain metrics measure only one thing, making the choice of aggregation irrelevant.
|
||||
|
||||
For information on dimensions, see the [dimensions reference](#dimensions-reference).
|
||||
|
||||
This list is extensive. We recommend using Ctrl/Cmd + F to find what you're looking for.
|
||||
{: .tip }
|
||||
|
||||
<table>
|
||||
<thead style="text-align: left">
|
||||
<tr>
|
||||
<th>Metric</th>
|
||||
<th>Dimensions</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>CPU_Utilization
|
||||
</td>
|
||||
<td rowspan="18">ShardID, IndexName, Operation, ShardRole
|
||||
</td>
|
||||
<td>CPU usage ratio. CPU time (in milliseconds) used by the associated thread(s) in the past five seconds, divided by 5000 milliseconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Paging_MajfltRate
|
||||
</td>
|
||||
<td>The number of major faults per second in the past five seconds. A major fault requires the process to load a memory page from disk.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Paging_MinfltRate
|
||||
</td>
|
||||
<td>The number of minor faults per second in the past five seconds. A minor fault does not requires the process to load a memory page from disk.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Paging_RSS
|
||||
</td>
|
||||
<td>The number of pages the process has in real memory---the pages that count towards text, data, or stack space. This number does not include pages that have not been demand-loaded in or swapped out.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Sched_Runtime
|
||||
</td>
|
||||
<td>Time (seconds) spent executing on the CPU per context switch.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Sched_Waittime
|
||||
</td>
|
||||
<td>Time (seconds) spent waiting on a run queue per context switch.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Sched_CtxRate
|
||||
</td>
|
||||
<td>Number of times run on the CPU per second in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Heap_AllocRate
|
||||
</td>
|
||||
<td>An approximation of the heap memory allocated, in bytes, per second in the past five seconds
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>IO_ReadThroughput
|
||||
</td>
|
||||
<td>Number of bytes read per second in the last five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>IO_WriteThroughput
|
||||
</td>
|
||||
<td>Number of bytes written per second in the last five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>IO_TotThroughput
|
||||
</td>
|
||||
<td>Number of bytes read or written per second in the last five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>IO_ReadSyscallRate
|
||||
</td>
|
||||
<td>Read system calls per second in the last five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>IO_WriteSyscallRate
|
||||
</td>
|
||||
<td>Write system calls per second in the last five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>IO_TotalSyscallRate
|
||||
</td>
|
||||
<td>Read and write system calls per second in the last five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Thread_Blocked_Time
|
||||
</td>
|
||||
<td>Average time (seconds) that the associated thread(s) blocked to enter or reenter a monitor.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Thread_Blocked_Event
|
||||
</td>
|
||||
<td>The total number of times that the associated thread(s) blocked to enter or reenter a monitor (i.e. the number of times a thread has been in the blocked state).
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ShardEvents
|
||||
</td>
|
||||
<td>The total number of events executed on a shard in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ShardBulkDocs
|
||||
</td>
|
||||
<td>The total number of documents indexed in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Indexing_ThrottleTime
|
||||
</td>
|
||||
<td rowspan="30">ShardID, IndexName
|
||||
</td>
|
||||
<td>Time (milliseconds) that the index has been under merge throttling control in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cache_Query_Hit
|
||||
</td>
|
||||
<td>The number of successful lookups in the query cache in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cache_Query_Miss
|
||||
</td>
|
||||
<td>The number of lookups in the query cache that failed to retrieve a `DocIdSet` in the past five seconds. `DocIdSet` is a set of document IDs in Lucene.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cache_Query_Size
|
||||
</td>
|
||||
<td>Query cache memory size in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cache_FieldData_Eviction
|
||||
</td>
|
||||
<td>The number of times OpenSearch has evicted data from the fielddata heap space (occurs when the heap space is full) in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cache_FieldData_Size
|
||||
</td>
|
||||
<td>Fielddata memory size in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cache_Request_Hit
|
||||
</td>
|
||||
<td>The number of successful lookups in the shard request cache in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cache_Request_Miss
|
||||
</td>
|
||||
<td>The number of lookups in the request cache that failed to retrieve the results of search requests in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cache_Request_Eviction
|
||||
</td>
|
||||
<td>The number of times OpenSearch evicts data from shard request cache (occurs when the request cache is full) in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cache_Request_Size
|
||||
</td>
|
||||
<td>Shard request cache memory size in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Refresh_Event
|
||||
</td>
|
||||
<td>The total number of refreshes executed in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Refresh_Time
|
||||
</td>
|
||||
<td>The total time (milliseconds) spent executing refreshes in the past five seconds
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Flush_Event
|
||||
</td>
|
||||
<td>The total number of flushes executed in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Flush_Time
|
||||
</td>
|
||||
<td>The total time (milliseconds) spent executing flushes in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Merge_Event
|
||||
</td>
|
||||
<td>The total number of merges executed in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Merge_Time
|
||||
</td>
|
||||
<td>The total time (milliseconds) spent executing merges in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Merge_CurrentEvent
|
||||
</td>
|
||||
<td>The current number of merges executing.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Indexing_Buffer
|
||||
</td>
|
||||
<td>Index buffer memory size in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Segments_Total
|
||||
</td>
|
||||
<td>The number of segments.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Segments_Memory
|
||||
</td>
|
||||
<td>Estimated memory usage of segments in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Terms_Memory
|
||||
</td>
|
||||
<td>Estimated memory usage of terms dictionaries in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>StoredFields_Memory
|
||||
</td>
|
||||
<td>Estimated memory usage of stored fields in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>TermVectors_Memory
|
||||
</td>
|
||||
<td>Estimated memory usage of term vectors in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Norms_Memory
|
||||
</td>
|
||||
<td>Estimated memory usage of norms (normalization factors) in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Points_Memory
|
||||
</td>
|
||||
<td>Estimated memory usage of points in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>DocValues_Memory
|
||||
</td>
|
||||
<td>Estimated memory usage of doc values in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>IndexWriter_Memory
|
||||
</td>
|
||||
<td>Estimated memory usage by the index writer in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Bitset_Memory
|
||||
</td>
|
||||
<td>Estimated memory usage for the cached bit sets in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>VersionMap_Memory
|
||||
</td>
|
||||
<td>Estimated memory usage of the version map in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Shard_Size_In_Bytes
|
||||
</td>
|
||||
<td>Estimated disk usage of the shard in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Latency
|
||||
</td>
|
||||
<td>Operation, Exception, Indices, HTTPRespCode, ShardID, IndexName, ShardRole
|
||||
</td>
|
||||
<td>Latency (milliseconds) of a request.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>GC_Collection_Event
|
||||
</td>
|
||||
<td rowspan="6">MemType
|
||||
</td>
|
||||
<td>The number of garbage collections that have occurred in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>GC_Collection_Time
|
||||
</td>
|
||||
<td>The approximate accumulated time (milliseconds) of all garbage collections that have occurred in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Heap_Committed
|
||||
</td>
|
||||
<td>The amount of memory (bytes) that is committed for the JVM to use.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Heap_Init
|
||||
</td>
|
||||
<td>The amount of memory (bytes) that the JVM initially requests from the operating system for memory management.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Heap_Max
|
||||
</td>
|
||||
<td>The maximum amount of memory (bytes) that can be used for memory management.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Heap_Used
|
||||
</td>
|
||||
<td>The amount of used memory in bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Disk_Utilization
|
||||
</td>
|
||||
<td rowspan="3">DiskName
|
||||
</td>
|
||||
<td>Disk utilization rate: percentage of disk time spent reading and writing by the OpenSearch process in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Disk_WaitTime
|
||||
</td>
|
||||
<td>Average duration (milliseconds) of read and write operations in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Disk_ServiceRate
|
||||
</td>
|
||||
<td>Service rate: MB read or written per second in the past five seconds. This metric assumes that each disk sector stores 512 bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_TCP_NumFlows
|
||||
</td>
|
||||
<td rowspan="6">DestAddr
|
||||
</td>
|
||||
<td>Number of samples collected. Performance Analyzer collects one sample every five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_TCP_TxQ
|
||||
</td>
|
||||
<td>Average number of TCP packets in the send buffer.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_TCP_RxQ
|
||||
</td>
|
||||
<td>Average number of TCP packets in the receive buffer.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_TCP_Lost
|
||||
</td>
|
||||
<td>Average number of unrecovered recurring timeouts. This number is reset when the recovery finishes or `SND.UNA` is advanced. `SND.UNA` is the sequence number of the first byte of data that has been sent, but not yet acknowledged.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_TCP_SendCWND
|
||||
</td>
|
||||
<td>Average size (bytes) of the sending congestion window.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_TCP_SSThresh
|
||||
</td>
|
||||
<td>Average size (bytes) of the slow start size threshold.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_PacketRate4
|
||||
</td>
|
||||
<td rowspan="5">Direction
|
||||
</td>
|
||||
<td>The total number of IPv4 datagrams transmitted/received from/by interfaces per second, including those transmitted or received in error
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_PacketDropRate4
|
||||
</td>
|
||||
<td>The total number of IPv4 datagrams transmitted or received in error per second.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_PacketRate6
|
||||
</td>
|
||||
<td>The total number of IPv6 datagrams transmitted or received from or by interfaces per second, including those transmitted or received in error.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_PacketDropRate6
|
||||
</td>
|
||||
<td>The total number of IPv6 datagrams transmitted or received in error per second.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Net_Throughput
|
||||
</td>
|
||||
<td>The number of bits transmitted or received per second by all network interfaces.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ThreadPool_QueueSize
|
||||
</td>
|
||||
<td rowspan="4">ThreadPoolType
|
||||
</td>
|
||||
<td>The size of the task queue.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ThreadPool_RejectedReqs
|
||||
</td>
|
||||
<td>The number of rejected executions.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ThreadPool_TotalThreads
|
||||
</td>
|
||||
<td>The current number of threads in the pool.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ThreadPool_ActiveThreads
|
||||
</td>
|
||||
<td>The approximate number of threads that are actively executing tasks.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Master_PendingQueueSize
|
||||
</td>
|
||||
<td>N/A
|
||||
</td>
|
||||
<td>The current number of pending tasks in the cluster state update thread. Each node has a cluster state update thread that submits cluster state update tasks (create index, update mapping, allocate shard, fail shard, etc.).
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>HTTP_RequestDocs
|
||||
</td>
|
||||
<td rowspan="2">Operation, Exception, Indices, HTTPRespCode
|
||||
</td>
|
||||
<td>The number of items in the request (only for `_bulk` request type).
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>HTTP_TotalRequests
|
||||
</td>
|
||||
<td>The number of finished requests in the past five seconds.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>CB_EstimatedSize
|
||||
</td>
|
||||
<td rowspan="3">CBType
|
||||
</td>
|
||||
<td>The current number of estimated bytes.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>CB_TrippedEvents
|
||||
</td>
|
||||
<td>The number of times the circuit breaker has tripped.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>CB_ConfiguredSize
|
||||
</td>
|
||||
<td>The limit (bytes) for how much memory operations can use.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Master_Task_Queue_Time
|
||||
</td>
|
||||
<td rowspan="2">MasterTaskInsertOrder, MasterTaskPriority, MasterTaskType, MasterTaskMetadata
|
||||
</td>
|
||||
<td>The time (milliseconds) that a master task spent in the queue.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Master_Task_Run_Time
|
||||
</td>
|
||||
<td>The time (milliseconds) that a master task has been executed.
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
## Dimensions reference
|
||||
|
||||
Dimension | Return values
|
||||
:--- | :---
|
||||
ShardID | ID for the shard (e.g. `1`).
|
||||
IndexName | Name of the index (e.g. `my-index`).
|
||||
Operation | Type of operation (e.g. `shardbulk`).
|
||||
ShardRole | `primary`, `replica`
|
||||
Exception | OpenSearch exceptions (e.g. `org.opensearch.index_not_found_exception`).
|
||||
Indices | The list of indices in the request URI.
|
||||
HTTPRespCode | Response code from OpenSearch (e.g. `200`).
|
||||
MemType | `totYoungGC`, `totFullGC`, `Survivor`, `PermGen`, `OldGen`, `Eden`, `NonHeap`, `Heap`
|
||||
DiskName | Name of the disk (e.g. `sda1`).
|
||||
DestAddr | Destination address (e.g. `010015AC`).
|
||||
Direction | `in`, `out`
|
||||
ThreadPoolType | The OpenSearch thread pools (e.g. `index`, `search`,`snapshot`).
|
||||
CBType | `accounting`, `fielddata`, `in_flight_requests`, `parent`, `request`
|
||||
MasterTaskInsertOrder | The order in which the task was inserted (e.g. `3691`).
|
||||
MasterTaskPriority | Priority of the task (e.g. `URGENT`). OpenSearch executes higher priority tasks before lower priority ones, regardless of `insert_order`.
|
||||
MasterTaskType | `shard-started`, `create-index`, `delete-index`, `refresh-mapping`, `put-mapping`, `CleanupSnapshotRestoreState`, `Update snapshot state`
|
||||
MasterTaskMetadata | Metadata for the task (if any).
|
|
@ -0,0 +1,672 @@
|
|||
---
|
||||
layout: default
|
||||
title: Commands
|
||||
parent: Piped processing language
|
||||
nav_order: 4
|
||||
---
|
||||
|
||||
|
||||
# Commands
|
||||
|
||||
Start a PPL query with a `search` command to reference a table to search from. You can have the commands that follow in any order.
|
||||
|
||||
In the following example, the `search` command refers to an `accounts` index as the source, then uses `fields` and `where` commands for the conditions:
|
||||
|
||||
```sql
|
||||
search source=accounts
|
||||
| where age > 18
|
||||
| fields firstname, lastname
|
||||
```
|
||||
|
||||
In the below examples, we represent required arguments in angle brackets `< >` and optional arguments in square brackets `[ ]`.
|
||||
{: .note }
|
||||
|
||||
## search
|
||||
|
||||
Use the `search` command to retrieve a document from an index. You can only use the `search` command as the first command in the PPL query.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
search source=<index> [boolean-expression]
|
||||
```
|
||||
|
||||
Field | Description | Required
|
||||
:--- | :--- |:---
|
||||
`search` | Specify search keywords. | Yes
|
||||
`index` | Specify which index to query from. | No
|
||||
`bool-expression` | Specify an expression that evaluates to a boolean value. | No
|
||||
|
||||
*Example 1*: Get all documents
|
||||
|
||||
To get all documents from the `accounts` index:
|
||||
|
||||
```sql
|
||||
search source=accounts;
|
||||
```
|
||||
|
||||
| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname |
|
||||
:--- | :--- |
|
||||
| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke
|
||||
| 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond
|
||||
| 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates
|
||||
| 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams
|
||||
|
||||
*Example 2*: Get documents that match a condition
|
||||
|
||||
To get all documents from the `accounts` index that have either `account_number` equal to 1 or have `gender` as `F`:
|
||||
|
||||
```sql
|
||||
search source=accounts account_number=1 or gender="F";
|
||||
```
|
||||
|
||||
| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname |
|
||||
:--- | :--- |
|
||||
| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke |
|
||||
| 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates |
|
||||
|
||||
## dedup
|
||||
|
||||
The `dedup` (data deduplication) command removes duplicate documents defined by a field from the search result.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
dedup [int] <field-list> [keepempty=<bool>] [consecutive=<bool>]
|
||||
```
|
||||
|
||||
Field | Description | Type | Required | Default
|
||||
:--- | :--- |:--- |:--- |:---
|
||||
`int` | Retain the specified number of duplicate events for each combination. The number must be greater than 0. If you do not specify a number, only the first occurring event is kept and all other duplicates are removed from the results. | `string` | No | 1
|
||||
`keepempty` | If true, keep the document if any field in the field list has a null value or a field missing. | `nested list of objects` | No | False
|
||||
`consecutive` | If true, remove only consecutive events with duplicate combinations of values. | No | False | -
|
||||
`field-list` | Specify a comma-delimited field list. At least one field is required. | Yes | - | -
|
||||
|
||||
*Example 1*: Dedup by one field
|
||||
|
||||
To remove duplicate documents with the same gender:
|
||||
|
||||
```sql
|
||||
search source=accounts | dedup gender | fields account_number, gender;
|
||||
```
|
||||
|
||||
| account_number | gender
|
||||
:--- | :--- |
|
||||
1 | M
|
||||
13 | F
|
||||
|
||||
|
||||
*Example 2*: Keep two duplicate documents
|
||||
|
||||
To keep two duplicate documents with the same gender:
|
||||
|
||||
```sql
|
||||
search source=accounts | dedup 2 gender | fields account_number, gender;
|
||||
```
|
||||
|
||||
| account_number | gender
|
||||
:--- | :--- |
|
||||
1 | M
|
||||
6 | M
|
||||
13 | F
|
||||
|
||||
*Example 3*: Keep or ignore an empty field by default
|
||||
|
||||
To keep two duplicate documents with a `null` field value:
|
||||
|
||||
```sql
|
||||
search source=accounts | dedup email keepempty=true | fields account_number, email;
|
||||
```
|
||||
|
||||
| account_number | email
|
||||
:--- | :--- |
|
||||
1 | amberduke@pyrami.com
|
||||
6 | hattiebond@netagy.com
|
||||
13 | null
|
||||
18 | daleadams@boink.com
|
||||
|
||||
To remove duplicate documents with the `null` field value:
|
||||
|
||||
```sql
|
||||
search source=accounts | dedup email | fields account_number, email;
|
||||
```
|
||||
|
||||
| account_number | email
|
||||
:--- | :--- |
|
||||
1 | amberduke@pyrami.com
|
||||
6 | hattiebond@netagy.com
|
||||
18 | daleadams@boink.com
|
||||
|
||||
*Example 4*: Dedup of consecutive documents
|
||||
|
||||
To remove duplicates of consecutive documents:
|
||||
|
||||
```sql
|
||||
search source=accounts | dedup gender consecutive=true | fields account_number, gender;
|
||||
```
|
||||
|
||||
| account_number | gender
|
||||
:--- | :--- |
|
||||
1 | M
|
||||
13 | F
|
||||
18 | M
|
||||
|
||||
## eval
|
||||
|
||||
The `eval` command evaluates an expression and appends its result to the search result.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
eval <field>=<expression> ["," <field>=<expression> ]...
|
||||
```
|
||||
|
||||
Field | Description | Required
|
||||
:--- | :--- |:---
|
||||
`field` | If a field name does not exist, a new field is added. If the field name already exists, it's overwritten. | Yes
|
||||
`expression` | Specify any supported expression. | Yes
|
||||
|
||||
*Example 1*: Create a new field
|
||||
|
||||
To create a new `doubleAge` field for each document. `doubleAge` is the result of `age` multiplied by 2:
|
||||
|
||||
```sql
|
||||
search source=accounts | eval doubleAge = age * 2 | fields age, doubleAge;
|
||||
```
|
||||
|
||||
| age | doubleAge
|
||||
:--- | :--- |
|
||||
32 | 64
|
||||
36 | 72
|
||||
28 | 56
|
||||
33 | 66
|
||||
|
||||
*Example 2*: Overwrite the existing field
|
||||
|
||||
To overwrite the `age` field with `age` plus 1:
|
||||
|
||||
```sql
|
||||
search source=accounts | eval age = age + 1 | fields age;
|
||||
```
|
||||
|
||||
| age
|
||||
:--- |
|
||||
| 33
|
||||
| 37
|
||||
| 29
|
||||
| 34
|
||||
|
||||
*Example 3*: Create a new field with a field defined with the `eval` command
|
||||
|
||||
To create a new field `ddAge`. `ddAge` is the result of `doubleAge` multiplied by 2, where `doubleAge` is defined in the `eval` command:
|
||||
|
||||
```sql
|
||||
search source=accounts | eval doubleAge = age * 2, ddAge = doubleAge * 2 | fields age, doubleAge, ddAge;
|
||||
```
|
||||
|
||||
| age | doubleAge | ddAge
|
||||
:--- | :--- |
|
||||
| 32 | 64 | 128
|
||||
| 36 | 72 | 144
|
||||
| 28 | 56 | 112
|
||||
| 33 | 66 | 132
|
||||
|
||||
## fields
|
||||
|
||||
Use the `field` command to keep or remove fields from a search result.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
field [+|-] <field-list>
|
||||
```
|
||||
|
||||
Field | Description | Required | Default
|
||||
:--- | :--- |:---|:---
|
||||
`index` | Plus (+) keeps only fields specified in the field list. Minus (-) removes all fields specified in the field list. | No | +
|
||||
`field list` | Specify a comma-delimited list of fields. | Yes | No default
|
||||
|
||||
*Example 1*: Select specified fields from result
|
||||
|
||||
To get `account_number`, `firstname`, and `lastname` fields from a search result:
|
||||
|
||||
```sql
|
||||
search source=accounts | fields account_number, firstname, lastname;
|
||||
```
|
||||
|
||||
| account_number | firstname | lastname
|
||||
:--- | :--- |
|
||||
| 1 | Amber | Duke
|
||||
| 6 | Hattie | Bond
|
||||
| 13 | Nanette | Bates
|
||||
| 18 | Dale | Adams
|
||||
|
||||
*Example 2*: Remove specified fields from a search result
|
||||
|
||||
To remove the `account_number` field from the search results:
|
||||
|
||||
```sql
|
||||
search source=accounts | fields account_number, firstname, lastname | fields - account_number;
|
||||
```
|
||||
|
||||
| firstname | lastname
|
||||
:--- | :--- |
|
||||
| Amber | Duke
|
||||
| Hattie | Bond
|
||||
| Nanette | Bates
|
||||
| Dale | Adams
|
||||
|
||||
## rename
|
||||
|
||||
Use the `rename` command to rename one or more fields in the search result.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
rename <source-field> AS <target-field>["," <source-field> AS <target-field>]...
|
||||
```
|
||||
|
||||
Field | Description | Required
|
||||
:--- | :--- |:---
|
||||
`source-field` | The name of the field that you want to rename. | Yes
|
||||
`target-field` | The name you want to rename to. | Yes
|
||||
|
||||
*Example 1*: Rename one field
|
||||
|
||||
Rename the `account_number` field as `an`:
|
||||
|
||||
```sql
|
||||
search source=accounts | rename account_number as an | fields an;
|
||||
```
|
||||
|
||||
| an
|
||||
:--- |
|
||||
| 1
|
||||
| 6
|
||||
| 13
|
||||
| 18
|
||||
|
||||
*Example 2*: Rename multiple fields
|
||||
|
||||
Rename the `account_number` field as `an` and `employer` as `emp`:
|
||||
|
||||
```sql
|
||||
search source=accounts | rename account_number as an, employer as emp | fields an, emp;
|
||||
```
|
||||
|
||||
| an | emp
|
||||
:--- | :--- |
|
||||
| 1 | Pyrami
|
||||
| 6 | Netagy
|
||||
| 13 | Quility
|
||||
| 18 | null
|
||||
|
||||
## sort
|
||||
|
||||
Use the `sort` command to sort search results by a specified field.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
sort [count] <[+|-] sort-field>...
|
||||
```
|
||||
|
||||
Field | Description | Required | Default
|
||||
:--- | :--- |:---
|
||||
`count` | The maximum number results to return from the sorted result. If count=0, all results are returned. | No | 1000
|
||||
`[+|-]` | Use plus [+] to sort by ascending order and minus [-] to sort by descending order. | No | Ascending order
|
||||
`sort-field` | Specify the field that you want to sort by. | Yes | -
|
||||
|
||||
*Example 1*: Sort by one field
|
||||
|
||||
To sort all documents by the `age` field in ascending order:
|
||||
|
||||
```sql
|
||||
search source=accounts | sort age | fields account_number, age;
|
||||
```
|
||||
|
||||
| account_number | age |
|
||||
:--- | :--- |
|
||||
| 13 | 28
|
||||
| 1 | 32
|
||||
| 18 | 33
|
||||
| 6 | 36
|
||||
|
||||
*Example 2*: Sort by one field and return all results
|
||||
|
||||
To sort all documents by the `age` field in ascending order and specify count as 0 to get back all results:
|
||||
|
||||
```sql
|
||||
search source=accounts | sort 0 age | fields account_number, age;
|
||||
```
|
||||
|
||||
| account_number | age |
|
||||
:--- | :--- |
|
||||
| 13 | 28
|
||||
| 1 | 32
|
||||
| 18 | 33
|
||||
| 6 | 36
|
||||
|
||||
*Example 3*: Sort by one field in descending order
|
||||
|
||||
To sort all documents by the `age` field in descending order:
|
||||
|
||||
```sql
|
||||
search source=accounts | sort - age | fields account_number, age;
|
||||
```
|
||||
|
||||
| account_number | age |
|
||||
:--- | :--- |
|
||||
| 6 | 36
|
||||
| 18 | 33
|
||||
| 1 | 32
|
||||
| 13 | 28
|
||||
|
||||
*Example 4*: Specify the number of sorted documents to return
|
||||
|
||||
To sort all documents by the `age` field in ascending order and specify count as 2 to get back two results:
|
||||
|
||||
```sql
|
||||
search source=accounts | sort 2 age | fields account_number, age;
|
||||
```
|
||||
|
||||
| account_number | age |
|
||||
:--- | :--- |
|
||||
| 13 | 28
|
||||
| 1 | 32
|
||||
|
||||
*Example 5*: Sort by multiple fields
|
||||
|
||||
To sort all documents by the `gender` field in ascending order and `age` field in descending order:
|
||||
|
||||
```sql
|
||||
search source=accounts | sort + gender, - age | fields account_number, gender, age;
|
||||
```
|
||||
|
||||
| account_number | gender | age |
|
||||
:--- | :--- | :--- |
|
||||
| 13 | F | 28
|
||||
| 6 | M | 36
|
||||
| 18 | M | 33
|
||||
| 1 | M | 32
|
||||
|
||||
## stats
|
||||
|
||||
Use the `stats` command to aggregate from search results.
|
||||
|
||||
The following table lists the aggregation functions and also indicates how each one handles null or missing values:
|
||||
|
||||
Function | NULL | MISSING
|
||||
:--- | :--- |:---
|
||||
`COUNT` | Not counted | Not counted
|
||||
`SUM` | Ignore | Ignore
|
||||
`AVG` | Ignore | Ignore
|
||||
`MAX` | Ignore | Ignore
|
||||
`MIN` | Ignore | Ignore
|
||||
|
||||
|
||||
### Syntax
|
||||
|
||||
```
|
||||
stats <aggregation>... [by-clause]...
|
||||
```
|
||||
|
||||
Field | Description | Required | Default
|
||||
:--- | :--- |:---
|
||||
`aggregation` | Specify a statistical aggregation function. The argument of this function must be a field. | Yes | 1000
|
||||
`by-clause` | Specify one or more fields to group the results by. If not specified, the `stats` command returns only one row, which is the aggregation over the entire result set. | No | -
|
||||
|
||||
*Example 1*: Calculate the average value of a field
|
||||
|
||||
To calculate the average `age` of all documents:
|
||||
|
||||
```sql
|
||||
search source=accounts | stats avg(age);
|
||||
```
|
||||
|
||||
| avg(age)
|
||||
:--- |
|
||||
| 32.25
|
||||
|
||||
*Example 2*: Calculate the average value of a field by group
|
||||
|
||||
To calculate the average age grouped by gender:
|
||||
|
||||
```sql
|
||||
search source=accounts | stats avg(age) by gender;
|
||||
```
|
||||
|
||||
| gender | avg(age)
|
||||
:--- | :--- |
|
||||
| F | 28.0
|
||||
| M | 33.666666666666664
|
||||
|
||||
*Example 3*: Calculate the average and sum of a field by group
|
||||
|
||||
To calculate the average and sum of age grouped by gender:
|
||||
|
||||
```sql
|
||||
search source=accounts | stats avg(age), sum(age) by gender;
|
||||
```
|
||||
|
||||
| gender | avg(age) | sum(age)
|
||||
:--- | :--- |
|
||||
| F | 28 | 28
|
||||
| M | 33.666666666666664 | 101
|
||||
|
||||
*Example 4*: Calculate the maximum value of a field
|
||||
|
||||
To calculate the maximum age:
|
||||
|
||||
```sql
|
||||
search source=accounts | stats max(age);
|
||||
```
|
||||
|
||||
| max(age)
|
||||
:--- |
|
||||
| 36
|
||||
|
||||
*Example 5*: Calculate the maximum and minimum value of a field by group
|
||||
|
||||
To calculate the maximum and minimum age values grouped by gender:
|
||||
|
||||
```sql
|
||||
search source=accounts | stats max(age), min(age) by gender;
|
||||
```
|
||||
|
||||
| gender | min(age) | max(age)
|
||||
:--- | :--- | :--- |
|
||||
| F | 28 | 28
|
||||
| M | 32 | 36
|
||||
|
||||
## where
|
||||
|
||||
Use the `where` command with a bool expression to filter the search result. The `where` command only returns the result when the bool expression evaluates to true.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
where <boolean-expression>
|
||||
```
|
||||
|
||||
Field | Description | Required
|
||||
:--- | :--- |:---
|
||||
`bool-expression` | An expression that evaluates to a boolean value. | No
|
||||
|
||||
*Example 1*: Filter result set with a condition
|
||||
|
||||
To get all documents from the `accounts` index where `account_number` is 1 or gender is `F`:
|
||||
|
||||
```sql
|
||||
search source=accounts | where account_number=1 or gender="F" | fields account_number, gender;
|
||||
```
|
||||
|
||||
| account_number | gender
|
||||
:--- | :--- |
|
||||
| 1 | M
|
||||
| 13 | F
|
||||
|
||||
## head
|
||||
|
||||
Use the `head` command to return the first N number of results in a specified search order.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
head [keeplast = (true | false)] [while "("<boolean-expression>")"] [N]
|
||||
```
|
||||
|
||||
Field | Description | Required | Default
|
||||
:--- | :--- |:---
|
||||
`keeplast` | Use along with the `while` argument to check if the last result in the result set is retained. The last result is what caused the `while` condition to evaluate to false or NULL. Set `keeplast` to true to retain the last result and false to discard it. | No | True
|
||||
`while` | An expression that evaluates to either true or false. You cannot use statistical functions in this expression. | No | False
|
||||
`N` | Specify the number of results to return. | No | 10
|
||||
|
||||
*Example 1*: Get the first 10 results
|
||||
|
||||
To get the first 10 results:
|
||||
|
||||
```sql
|
||||
search source=accounts | fields firstname, age | head;
|
||||
```
|
||||
|
||||
| firstname | age
|
||||
:--- | :--- |
|
||||
| Amber | 32
|
||||
| Hattie | 36
|
||||
| Nanette | 28
|
||||
|
||||
*Example 2*: Get the first N results
|
||||
|
||||
To get the first two results:
|
||||
|
||||
```sql
|
||||
search source=accounts | fields firstname, age | head 2;
|
||||
```
|
||||
|
||||
| firstname | age
|
||||
:--- | :--- |
|
||||
| Amber | 32
|
||||
| Hattie | 36
|
||||
|
||||
*Example 3*: Get the first N results that match a while condition
|
||||
|
||||
To get the first 3 results from all accounts with age less than 30:
|
||||
|
||||
```sql
|
||||
search source=accounts | fields firstname, age | sort age | head while(age < 30) 3;
|
||||
```
|
||||
|
||||
| firstname | age
|
||||
:--- | :--- |
|
||||
| Nanette | 28
|
||||
| Amber | 32
|
||||
|
||||
*Example 4*: Get the first N results with a while condition with the last result that failed the condition
|
||||
|
||||
To get the first 3 results from all accounts with age less than 30 and include the last failed condition:
|
||||
|
||||
```sql
|
||||
search source=accounts | fields firstname, age | sort age | head keeplast=false while(age < 30) 3;
|
||||
```
|
||||
|
||||
| firstname | age
|
||||
:--- | :--- |
|
||||
| Nanette | 28
|
||||
|
||||
## rare
|
||||
|
||||
Use the `rare` command to find the least common values of all fields in a field list.
|
||||
A maximum of 10 results are returned for each distinct set of values of the group-by fields.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
rare <field-list> [by-clause]
|
||||
```
|
||||
|
||||
Field | Description | Required
|
||||
:--- | :--- |:---
|
||||
`field-list` | Specify a comma-delimited list of field names. | No
|
||||
`by-clause` | Specify one or more fields to group the results by. | No
|
||||
|
||||
*Example 1*: Find the least common values in a field
|
||||
|
||||
To find the least common values of gender:
|
||||
|
||||
```sql
|
||||
search source=accounts | rare gender;
|
||||
```
|
||||
|
||||
| gender
|
||||
:--- |
|
||||
| F
|
||||
| M
|
||||
|
||||
*Example 2*: Find the least common values grouped by gender
|
||||
|
||||
To find the least common age grouped by gender:
|
||||
|
||||
```sql
|
||||
search source=accounts | rare age by gender;
|
||||
```
|
||||
|
||||
| gender | age
|
||||
:--- | :--- |
|
||||
| F | 28
|
||||
| M | 32
|
||||
| M | 33
|
||||
|
||||
## top {#top-command}
|
||||
|
||||
Use the `top` command to find the most common values of all fields in the field list.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
top [N] <field-list> [by-clause]
|
||||
```
|
||||
|
||||
Field | Description | Default
|
||||
:--- | :--- |:---
|
||||
`N` | Specify the number of results to return. | 10
|
||||
`field-list` | Specify a comma-delimited list of field names. | -
|
||||
`by-clause` | Specify one or more fields to group the results by. | -
|
||||
|
||||
*Example 1*: Find the most common values in a field
|
||||
|
||||
To find the most common genders:
|
||||
|
||||
```sql
|
||||
search source=accounts | top gender;
|
||||
```
|
||||
|
||||
| gender
|
||||
:--- |
|
||||
| M
|
||||
| F
|
||||
|
||||
*Example 2*: Find the most common value in a field
|
||||
|
||||
To find the most common gender:
|
||||
|
||||
```sql
|
||||
search source=accounts | top 1 gender;
|
||||
```
|
||||
|
||||
| gender
|
||||
:--- |
|
||||
| M
|
||||
|
||||
*Example 2*: Find the most common values grouped by gender
|
||||
|
||||
To find the most common age grouped by gender:
|
||||
|
||||
```sql
|
||||
search source=accounts | top 1 age by gender;
|
||||
```
|
||||
|
||||
| gender | age
|
||||
:--- | :--- |
|
||||
| F | 28
|
||||
| M | 32
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
layout: default
|
||||
title: Data Types
|
||||
parent: Piped processing language
|
||||
nav_order: 6
|
||||
---
|
||||
|
||||
|
||||
# Data types
|
||||
|
||||
The following table shows the data types supported by the PPL plugin and how each one maps to OpenSearch and SQL data types:
|
||||
|
||||
PPL Type | OpenSearch Type | SQL Type
|
||||
:--- | :--- | :---
|
||||
boolean | boolean | BOOLEAN
|
||||
byte | byte | TINYINT
|
||||
byte | short | SMALLINT
|
||||
integer | integer | INTEGER
|
||||
long | long | BIGINT
|
||||
float | float | REAL
|
||||
float | half_float | FLOAT
|
||||
float | scaled_float | DOUBLE
|
||||
double | double | DOUBLE
|
||||
string | keyword | VARCHAR
|
||||
text | text | VARCHAR
|
||||
timestamp | date | TIMESTAMP
|
||||
ip | ip | VARCHAR
|
||||
timestamp | date | TIMESTAMP
|
||||
binary | binary | VARBINARY
|
||||
struct | object | STRUCT
|
||||
array | nested | STRUCT
|
||||
|
||||
In addition to this list, the PPL plugin also supports the `datetime` type, though it doesn't have a corresponding mapping with OpenSearch.
|
||||
To use a function without a corresponding mapping, you must explicitly convert the data type to one that does.
|
||||
|
||||
The PPL plugin supports all SQL date and time types. To learn more, see [SQL Data Types](../../sql/datatypes/).
|
|
@ -0,0 +1,22 @@
|
|||
---
|
||||
layout: default
|
||||
title: Endpoint
|
||||
parent: Piped processing language
|
||||
nav_order: 1
|
||||
---
|
||||
|
||||
# Endpoint
|
||||
|
||||
To send a query request to PPL plugin, use the HTTP POST request.
|
||||
We recommend a POST request because it doesn't have any length limit and it allows you to pass other parameters to the plugin for other functionality.
|
||||
|
||||
Use the explain endpoint for query translation and troubleshooting.
|
||||
|
||||
## Request Format
|
||||
|
||||
To use the PPL plugin with your own applications, send requests to `_opensearch/_ppl`, with your query in the request body:
|
||||
|
||||
```json
|
||||
curl -H 'Content-Type: application/json' -X POST localhost:9200/_opensearch/_ppl \
|
||||
... -d '{"query" : "source=accounts | fields firstname, lastname"}'
|
||||
```
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
layout: default
|
||||
title: Functions
|
||||
parent: Piped processing language
|
||||
nav_order: 10
|
||||
---
|
||||
|
||||
# Functions
|
||||
|
||||
The PPL plugin supports all SQL functions. To learn more, see [SQL Functions](../../sql/functions/).
|
|
@ -0,0 +1,72 @@
|
|||
---
|
||||
layout: default
|
||||
title: Identifiers
|
||||
parent: Piped processing language
|
||||
nav_order: 7
|
||||
---
|
||||
|
||||
|
||||
# Identifiers
|
||||
|
||||
An identifier is an ID to name your database objects, such as index names, field names, aliases, and so on.
|
||||
OpenSearch supports two types of identifiers: regular identifiers and delimited identifiers.
|
||||
|
||||
## Regular identifiers
|
||||
|
||||
A regular identifier is a string of characters that starts with an ASCII letter (lower or upper case).
|
||||
The next character can either be a letter, digit, or underscore (_). It can't be a reserved keyword.
|
||||
Whitespace and other special characters are also not allowed.
|
||||
|
||||
OpenSearch supports the following regular identifiers:
|
||||
|
||||
1. Identifiers prefixed by a dot `.` sign. Use to hide an index. For example `.opensearch-dashboards`.
|
||||
2. Identifiers prefixed by an `@` sign. Use for meta fields generated by Logstash ingestion.
|
||||
3. Identifiers with hyphen `-` in the middle. Use for index names with date information.
|
||||
4. Identifiers with star `*` present. Use for wildcard match of index patterns.
|
||||
|
||||
For regular identifiers, you can use the name without any back tick or escape characters.
|
||||
In this example, `source`, `fields`, `account_number`, `firstname`, and `lastname` are all identifiers. Out of these, the `source` field is a reserved identifier.
|
||||
|
||||
```sql
|
||||
source=accounts | fields account_number, firstname, lastname;
|
||||
```
|
||||
|
||||
| account_number | firstname | lastname |
|
||||
:--- | :--- |
|
||||
| 1 | Amber | Duke
|
||||
| 6 | Hattie | Bond
|
||||
| 13 | Nanette | Bates
|
||||
| 18 | Dale | Adams
|
||||
|
||||
|
||||
## Delimited identifiers
|
||||
|
||||
A delimited identifier can contain special characters not allowed by a regular identifier.
|
||||
You must enclose delimited identifiers with back ticks (\`\`). Back ticks differentiate the identifier from special characters.
|
||||
|
||||
If the index name includes a dot (`.`), for example, `log-2021.01.11`, use delimited identifiers with back ticks to escape it \``log-2021.01.11`\`.
|
||||
|
||||
Typical examples of using delimited identifiers:
|
||||
|
||||
1. Identifiers with reserved keywords.
|
||||
2. Identifiers with a `.` present. Similarly, `-` to include date information.
|
||||
3. Identifiers with other special characters. For example, Unicode characters.
|
||||
|
||||
To quote an index name with back ticks:
|
||||
|
||||
```sql
|
||||
source=`accounts` | fields `account_number`;
|
||||
```
|
||||
|
||||
| account_number |
|
||||
:--- |
|
||||
| 1 |
|
||||
| 6 |
|
||||
| 13 |
|
||||
| 18 |
|
||||
|
||||
## Case sensitivity
|
||||
|
||||
Identifiers are case sensitive. They must be exactly the same as what's stored in OpenSearch.
|
||||
|
||||
For example, if you run `source=Accounts`, you'll get an index not found exception because the actual index name is in lower case.
|
|
@ -0,0 +1,58 @@
|
|||
---
|
||||
layout: default
|
||||
title: Piped processing language
|
||||
nav_order: 42
|
||||
has_children: true
|
||||
has_toc: false
|
||||
---
|
||||
|
||||
# Piped Processing Language
|
||||
|
||||
Piped Processing Language (PPL) is a query language that lets you use pipe (`|`) syntax to explore, discover, and query data stored in OpenSearch.
|
||||
|
||||
To quickly get up and running with PPL, use **Query Workbench** in OpenSearch Dashboards. To learn more, see [Workbench](../sql/workbench/).
|
||||
|
||||
The PPL syntax consists of commands delimited by the pipe character (`|`) where data flows from left to right through each pipeline.
|
||||
|
||||
```sql
|
||||
search command | command 1 | command 2 ...
|
||||
```
|
||||
|
||||
You can only use read-only commands like `search`, `where`, `fields`, `rename`, `dedup`, `stats`, `sort`, `eval`, `head`, `top`, and `rare`.
|
||||
|
||||
## Quick start
|
||||
|
||||
To get started with PPL, choose **Dev Tools** in OpenSearch Dashboards and use the `bulk` operation to index some sample data:
|
||||
|
||||
```json
|
||||
PUT accounts/_bulk?refresh
|
||||
{"index":{"_id":"1"}}
|
||||
{"account_number":1,"balance":39225,"firstname":"Amber","lastname":"Duke","age":32,"gender":"M","address":"880 Holmes Lane","employer":"Pyrami","email":"amberduke@pyrami.com","city":"Brogan","state":"IL"}
|
||||
{"index":{"_id":"6"}}
|
||||
{"account_number":6,"balance":5686,"firstname":"Hattie","lastname":"Bond","age":36,"gender":"M","address":"671 Bristol Street","employer":"Netagy","email":"hattiebond@netagy.com","city":"Dante","state":"TN"}
|
||||
{"index":{"_id":"13"}}
|
||||
{"account_number":13,"balance":32838,"firstname":"Nanette","lastname":"Bates","age":28,"gender":"F","address":"789 Madison Street","employer":"Quility","city":"Nogal","state":"VA"}
|
||||
{"index":{"_id":"18"}}
|
||||
{"account_number":18,"balance":4180,"firstname":"Dale","lastname":"Adams","age":33,"gender":"M","address":"467 Hutchinson Court","email":"daleadams@boink.com","city":"Orick","state":"MD"}
|
||||
```
|
||||
|
||||
Go to **Query Workbench** and select **PPL**.
|
||||
|
||||
The following example returns `firstname` and `lastname` fields for documents in an `accounts` index with `age` greater than 18:
|
||||
|
||||
```json
|
||||
search source=accounts
|
||||
| where age > 18
|
||||
| fields firstname, lastname
|
||||
```
|
||||
|
||||
#### Sample Response
|
||||
|
||||
| id | firstname | lastname |
|
||||
:--- | :--- | :--- |
|
||||
| 0 | Amber | Duke
|
||||
| 1 | Hattie | Bond
|
||||
| 2 | Nanette | Bates
|
||||
| 3 | Dale | Adams
|
||||
|
||||
![PPL query workbench](../images/ppl.png)
|
|
@ -0,0 +1,71 @@
|
|||
---
|
||||
layout: default
|
||||
title: Protocol
|
||||
parent: Piped processing language
|
||||
nav_order: 2
|
||||
---
|
||||
|
||||
# Protocol
|
||||
|
||||
The PPL plugin provides responses in JDBC format. The JDBC format is widely used because it provides schema information and more functionality such as pagination. Besides JDBC driver, various clients can benefit from the detailed and well formatted response.
|
||||
|
||||
## Response Format
|
||||
|
||||
The body of HTTP POST request can take a few more additional fields with the PPL query:
|
||||
|
||||
```json
|
||||
curl -H 'Content-Type: application/json' -X POST localhost:9200/_opensearch/_ppl \
|
||||
... -d '{"query" : "source=accounts | fields firstname, lastname"}'
|
||||
```
|
||||
|
||||
The following example shows a normal response where the schema includes a field name and its type and datarows includes the result set:
|
||||
|
||||
```json
|
||||
{
|
||||
"schema": [
|
||||
{
|
||||
"name": "firstname",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "lastname",
|
||||
"type": "string"
|
||||
}
|
||||
],
|
||||
"datarows": [
|
||||
[
|
||||
"Amber",
|
||||
"Duke"
|
||||
],
|
||||
[
|
||||
"Hattie",
|
||||
"Bond"
|
||||
],
|
||||
[
|
||||
"Nanette",
|
||||
"Bates"
|
||||
],
|
||||
[
|
||||
"Dale",
|
||||
"Adams"
|
||||
]
|
||||
],
|
||||
"total": 4,
|
||||
"size": 4
|
||||
}
|
||||
```
|
||||
|
||||
If any error occurred, error message and the cause will be returned instead:
|
||||
|
||||
```json
|
||||
curl -H 'Content-Type: application/json' -X POST localhost:9200/_opensearch/_ppl \
|
||||
... -d '{"query" : "source=unknown | fields firstname, lastname"}'
|
||||
{
|
||||
"error": {
|
||||
"reason": "Error occurred in OpenSearch engine: no such index [unknown]",
|
||||
"details": "org.opensearch.index.IndexNotFoundException: no such index [unknown]\nFor more details, please send request for Json format to see the raw response from opensearch engine.",
|
||||
"type": "IndexNotFoundException"
|
||||
},
|
||||
"status": 404
|
||||
}
|
||||
```
|
|
@ -0,0 +1,35 @@
|
|||
---
|
||||
layout: default
|
||||
title: Settings
|
||||
parent: Piped processing language
|
||||
nav_order: 3
|
||||
---
|
||||
|
||||
# Settings
|
||||
|
||||
The PPL plugin adds a few settings to the standard OpenSearch cluster settings. Most are dynamic, so you can change the default behavior of the plugin without restarting your cluster.
|
||||
|
||||
You can update these settings like any other cluster setting:
|
||||
|
||||
```json
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"transient": {
|
||||
"opensearch": {
|
||||
"ppl": {
|
||||
"enabled": "false"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Requests to `_opensearch/_ppl` include index names in the request body, so they have the same access policy considerations as the `bulk`, `mget`, and `msearch` operations. If you set the `rest.action.multi.allow_explicit_index` parameter to `false`, the PPL plugin is disabled.
|
||||
|
||||
You can specify the settings shown in the following table:
|
||||
|
||||
Setting | Description | Default
|
||||
:--- | :--- | :---
|
||||
`opensearch.ppl.enabled` | Change to `false` to disable the plugin. | True
|
||||
`opensearch.ppl.query.memory_limit` | Set heap memory usage limit. If a query crosses this limit, it's terminated. | 85%
|
||||
`opensearch.query.size_limit` | Set the maximum number of results that you want to see. This impacts the accuracy of aggregation operations. For example, if you have 1000 documents in an index, by default, only 200 documents are extracted from the index for aggregation. | 200
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,242 @@
|
|||
---
|
||||
layout: default
|
||||
title: Cross-Cluster Search
|
||||
parent: Access Control
|
||||
grand_parent: Security
|
||||
nav_order: 40
|
||||
---
|
||||
|
||||
# Cross-cluster search
|
||||
|
||||
Cross-cluster search is exactly what it sounds like: it lets any node in a cluster execute search requests against other clusters. The security plugin supports cross-cluster search out of the box.
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Authentication flow
|
||||
|
||||
When accessing a *remote cluster* from a *coordinating cluster* using cross-cluster search:
|
||||
|
||||
1. The security plugin authenticates the user on the coordinating cluster.
|
||||
1. The security plugin fetches the user's backend roles on the coordinating cluster.
|
||||
1. The call, including the authenticated user, is forwarded to the remote cluster.
|
||||
1. The user's permissions are evaluated on the remote cluster.
|
||||
|
||||
You can have different authentication and authorization configurations on the remote and coordinating cluster, but we recommend using the same settings on both.
|
||||
|
||||
|
||||
## Permissions
|
||||
|
||||
To query indices on remote clusters, users need to have the following permissions for the index, in addition to `READ` or `SEARCH` permissions:
|
||||
|
||||
```
|
||||
indices:admin/shards/search_shards
|
||||
```
|
||||
|
||||
|
||||
#### Sample roles.yml configuration
|
||||
|
||||
```yml
|
||||
humanresources:
|
||||
cluster:
|
||||
- CLUSTER_COMPOSITE_OPS_RO
|
||||
indices:
|
||||
'humanresources':
|
||||
'*':
|
||||
- READ
|
||||
- indices:admin/shards/search_shards # needed for CCS
|
||||
```
|
||||
|
||||
|
||||
#### Sample role in OpenSearch Dashboards
|
||||
|
||||
![OpenSearch Dashboards UI for creating a cross-cluster search role](../../../images/security-ccs.png)
|
||||
|
||||
|
||||
## Walkthrough
|
||||
|
||||
Save this file as `docker-compose.yml` and run `docker-compose up` to start two single-node clusters on the same network:
|
||||
|
||||
```yml
|
||||
version: '3'
|
||||
services:
|
||||
opensearch-node1:
|
||||
image: opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
container_name: opensearch-node1
|
||||
environment:
|
||||
- cluster.name=opensearch-cluster1
|
||||
- discovery.type=single-node
|
||||
- bootstrap.memory_lock=true # along with the memlock settings below, disables swapping
|
||||
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
volumes:
|
||||
- opensearch-data1:/usr/share/opensearch/data
|
||||
ports:
|
||||
- 9200:9200
|
||||
- 9600:9600 # required for Performance Analyzer
|
||||
networks:
|
||||
- opensearch-net
|
||||
|
||||
opensearch-node2:
|
||||
image: opensearchproject/opensearch:{{site.opensearch_version}}
|
||||
container_name: opensearch-node2
|
||||
environment:
|
||||
- cluster.name=opensearch-cluster2
|
||||
- discovery.type=single-node
|
||||
- bootstrap.memory_lock=true # along with the memlock settings below, disables swapping
|
||||
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
volumes:
|
||||
- opensearch-data2:/usr/share/opensearch/data
|
||||
ports:
|
||||
- 9250:9200
|
||||
- 9700:9600 # required for Performance Analyzer
|
||||
networks:
|
||||
- opensearch-net
|
||||
|
||||
volumes:
|
||||
opensearch-data1:
|
||||
opensearch-data2:
|
||||
|
||||
networks:
|
||||
opensearch-net:
|
||||
```
|
||||
|
||||
After the clusters start, verify the names of each:
|
||||
|
||||
```json
|
||||
curl -XGET -u 'admin:admin' -k 'https://localhost:9200'
|
||||
{
|
||||
"cluster_name" : "opensearch-cluster1",
|
||||
...
|
||||
}
|
||||
|
||||
curl -XGET -u 'admin:admin' -k 'https://localhost:9250'
|
||||
{
|
||||
"cluster_name" : "opensearch-cluster2",
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
Both clusters run on `localhost`, so the important identifier is the port number. In this case, use port 9200 (`opensearch-node1`) as the remote cluster, and port 9250 (`opensearch-node2`) as the coordinating cluster.
|
||||
|
||||
To get the IP address for the remote cluster, first identify its container ID:
|
||||
|
||||
```bash
|
||||
docker ps
|
||||
CONTAINER ID IMAGE PORTS NAMES
|
||||
6fe89ebc5a8e opensearchproject/opensearch:{{site.opensearch_version}} 0.0.0.0:9200->9200/tcp, 0.0.0.0:9600->9600/tcp, 9300/tcp opensearch-node1
|
||||
2da08b6c54d8 opensearchproject/opensearch:{{site.opensearch_version}} 9300/tcp, 0.0.0.0:9250->9200/tcp, 0.0.0.0:9700->9600/tcp opensearch-node2
|
||||
```
|
||||
|
||||
Then get that container's IP address:
|
||||
|
||||
```bash
|
||||
docker inspect --format='{% raw %}{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}{% endraw %}' 6fe89ebc5a8e
|
||||
172.31.0.3
|
||||
```
|
||||
|
||||
On the coordinating cluster, add the remote cluster name and the IP address (with port 9300) for each "seed node." In this case, you only have one seed node:
|
||||
|
||||
```json
|
||||
curl -k -XPUT -H 'Content-Type: application/json' -u 'admin:admin' 'https://localhost:9250/_cluster/settings' -d '
|
||||
{
|
||||
"persistent": {
|
||||
"search.remote": {
|
||||
"opensearch-cluster1": {
|
||||
"seeds": ["172.31.0.3:9300"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
On the remote cluster, index a document:
|
||||
|
||||
```bash
|
||||
curl -XPUT -k -H 'Content-Type: application/json' -u 'admin:admin' 'https://localhost:9200/books/_doc/1' -d '{"Dracula": "Bram Stoker"}'
|
||||
```
|
||||
|
||||
At this point, cross-cluster search works. You can test it using the `admin` user:
|
||||
|
||||
```bash
|
||||
curl -XGET -k -u 'admin:admin' 'https://localhost:9250/opensearch-cluster1:books/_search?pretty'
|
||||
{
|
||||
...
|
||||
"hits": [{
|
||||
"_index": "opensearch-cluster1:books",
|
||||
"_type": "_doc",
|
||||
"_id": "1",
|
||||
"_score": 1.0,
|
||||
"_source": {
|
||||
"Dracula": "Bram Stoker"
|
||||
}
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
To continue testing, create a new user on both clusters:
|
||||
|
||||
```bash
|
||||
curl -XPUT -k -u 'admin:admin' 'https://localhost:9200/_opensearch/_security/api/internalusers/booksuser' -H 'Content-Type: application/json' -d '{"password":"password"}'
|
||||
curl -XPUT -k -u 'admin:admin' 'https://localhost:9250/_opensearch/_security/api/internalusers/booksuser' -H 'Content-Type: application/json' -d '{"password":"password"}'
|
||||
```
|
||||
|
||||
Then run the same search as before with `booksuser`:
|
||||
|
||||
```json
|
||||
curl -XGET -k -u booksuser:password 'https://localhost:9250/opensearch-cluster1:books/_search?pretty'
|
||||
{
|
||||
"error" : {
|
||||
"root_cause" : [
|
||||
{
|
||||
"type" : "security_exception",
|
||||
"reason" : "no permissions for [indices:admin/shards/search_shards, indices:data/read/search] and User [name=booksuser, roles=[], requestedTenant=null]"
|
||||
}
|
||||
],
|
||||
"type" : "security_exception",
|
||||
"reason" : "no permissions for [indices:admin/shards/search_shards, indices:data/read/search] and User [name=booksuser, roles=[], requestedTenant=null]"
|
||||
},
|
||||
"status" : 403
|
||||
}
|
||||
```
|
||||
|
||||
Note the permissions error. On the remote cluster, create a role with the appropriate permissions, and map `booksuser` to that role:
|
||||
|
||||
```bash
|
||||
curl -XPUT -k -u 'admin:admin' -H 'Content-Type: application/json' 'https://localhost:9200/_opensearch/_security/api/roles/booksrole' -d '{"index_permissions":[{"index_patterns":["books"],"allowed_actions":["indices:admin/shards/search_shards","indices:data/read/search"]}]}'
|
||||
curl -XPUT -k -u 'admin:admin' -H 'Content-Type: application/json' 'https://localhost:9200/_opensearch/_security/api/rolesmapping/booksrole' -d '{"users" : ["booksuser"]}'
|
||||
```
|
||||
|
||||
Both clusters must have the user, but only the remote cluster needs the role and mapping; in this case, the coordinating cluster handles authentication (i.e. "Does this request include valid user credentials?"), and the remote cluster handles authorization (i.e. "Can this user access this data?").
|
||||
{: .tip }
|
||||
|
||||
Finally, repeat the search:
|
||||
|
||||
```bash
|
||||
curl -XGET -k -u booksuser:password 'https://localhost:9250/opensearch-cluster1:books/_search?pretty'
|
||||
{
|
||||
...
|
||||
"hits": [{
|
||||
"_index": "opensearch-cluster1:books",
|
||||
"_type": "_doc",
|
||||
"_id": "1",
|
||||
"_score": 1.0,
|
||||
"_source": {
|
||||
"Dracula": "Bram Stoker"
|
||||
}
|
||||
}]
|
||||
}
|
||||
```
|
|
@ -0,0 +1,54 @@
|
|||
---
|
||||
layout: default
|
||||
title: Default Action Groups
|
||||
parent: Access Control
|
||||
grand_parent: Security
|
||||
nav_order: 51
|
||||
---
|
||||
|
||||
# Default action groups
|
||||
|
||||
This page catalogs all default action groups. Often, the most coherent way to create new action groups is to use a combination of these default groups and [individual permissions](../permissions).
|
||||
|
||||
|
||||
## General
|
||||
|
||||
Name | Description
|
||||
:--- | :---
|
||||
unlimited | Grants complete access. Can be used on an cluster- or index-level. Equates to `"*"`.
|
||||
{% comment %}kibana_all_read | asdf
|
||||
kibana_all_write | asdf{% endcomment %}
|
||||
|
||||
|
||||
|
||||
## Cluster-level
|
||||
|
||||
Name | Description
|
||||
:---| :---
|
||||
cluster_all | Grants all cluster permissions. Equates to `cluster:*`.
|
||||
cluster_monitor | Grants all cluster monitoring permissions. Equates to `cluster:monitor/*`.
|
||||
cluster_composite_ops_ro | Grants read-only permissions to execute requests like `mget`, `msearch`, or `mtv`, plus permissions to query for aliases.
|
||||
cluster_composite_ops | Same as `CLUSTER_COMPOSITE_OPS_RO`, but also grants `bulk` permissions and all aliases permissions.
|
||||
manage_snapshots | Grants permissions to manage snapshots and repositories.
|
||||
cluster_manage_pipelines | Grants permissions to manage ingest pipelines.
|
||||
cluster_manage_index_templates | Grants permissions to manage index templates.
|
||||
|
||||
|
||||
## Index-level
|
||||
|
||||
Name | Description
|
||||
:--- | :---
|
||||
indices_all | Grants all permissions on the index. Equates to `indices:*`.
|
||||
get | Grants permissions to use `get` and `mget` actions only.
|
||||
read | Grants read permissions such as search, get field mappings, `get`, and `mget`.
|
||||
write | Grants permissions to create and update documents within *existing indices*. To create new indices, see `create_index`.
|
||||
delete | Grants permissions to delete documents.
|
||||
crud | Combines the `read`, `write`, and `delete` action groups. Included in the `data_access` action group.
|
||||
search | Grants permissions to search documents. Includes `suggest`.
|
||||
suggest | Grants permissions to use the suggest API. Included in the `read` action group.
|
||||
create_index | Grants permissions to create indices and mappings.
|
||||
indices_monitor | Grants permissions to execute all index monitoring actions (e.g. recovery, segments info, index stats, and status).
|
||||
index | A more limited version of the `write` action group.
|
||||
data_access | Combines the `crud` action group with `indices:data/*`.
|
||||
manage_aliases | Grants permissions to manage aliases.
|
||||
manage | Grants all monitoring and administration permissions for indices.
|
|
@ -0,0 +1,127 @@
|
|||
---
|
||||
layout: default
|
||||
title: Document-Level Security
|
||||
parent: Access Control
|
||||
grand_parent: Security
|
||||
nav_order: 10
|
||||
---
|
||||
|
||||
# Document-level security
|
||||
|
||||
Document-level security lets you restrict a role to a subset of documents in an index. The easiest way to get started with document- and field-level security is open OpenSearch Dashboards and choose **Security**. Then choose **Roles**, create a new role, and review the **Index permissions** section.
|
||||
|
||||
![Document- and field-level security screen in OpenSearch Dashboards](../../../images/security-dls.png)
|
||||
|
||||
|
||||
## Simple roles
|
||||
|
||||
Document-level security uses the OpenSearch query DSL to define which documents a role grants access to. In OpenSearch Dashboards, choose an index pattern and provide a query in the **Document level security** section:
|
||||
|
||||
```json
|
||||
{
|
||||
"bool": {
|
||||
"must": {
|
||||
"match": {
|
||||
"genres": "Comedy"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This query specifies that for the role to have access to a document, its `genres` field must include `Comedy`.
|
||||
|
||||
A typical request to the `_search` API includes `{ "query": { ... } }` around the query, but in this case, you only need to specify the query itself.
|
||||
|
||||
In the REST API, you provide the query as a string, so you must escape your quotes. This role allows a user to read any document in any index with the field `public` set to `true`:
|
||||
|
||||
```json
|
||||
PUT _opensearch/_security/api/roles/public_data
|
||||
{
|
||||
"cluster_permissions": [
|
||||
"*"
|
||||
],
|
||||
"index_permissions": [{
|
||||
"index_patterns": [
|
||||
"pub*"
|
||||
],
|
||||
"dls": "{\"term\": { \"public\": true}}",
|
||||
"allowed_actions": [
|
||||
"read"
|
||||
]
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
These queries can be as complex as you want, but we recommend keeping them simple to minimize the performance impact that the document-level security feature has on the cluster.
|
||||
{: .warning }
|
||||
|
||||
|
||||
## Parameter substitution
|
||||
|
||||
A number of variables exist that you can use to enforce rules based on the properties of a user. For example, `${user.name}` is replaced with the name of the current user.
|
||||
|
||||
This rule allows a user to read any document where the username is a value of the `readable_by` field:
|
||||
|
||||
```json
|
||||
PUT _opensearch/_security/api/roles/user_data
|
||||
{
|
||||
"cluster_permissions": [
|
||||
"*"
|
||||
],
|
||||
"index_permissions": [{
|
||||
"index_patterns": [
|
||||
"pub*"
|
||||
],
|
||||
"dls": "{\"term\": { \"readable_by\": \"${user.name}\"}}",
|
||||
"allowed_actions": [
|
||||
"read"
|
||||
]
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
This table lists substitutions.
|
||||
|
||||
Term | Replaced with
|
||||
:--- | :---
|
||||
`${user.name}` | Username.
|
||||
`${user.roles}` | A comma-separated, quoted list of user roles.
|
||||
`${attr.<TYPE>.<NAME>}` | An attribute with name `<NAME>` defined for a user. `<TYPE>` is `internal`, `jwt`, `proxy` or `ldap`
|
||||
|
||||
|
||||
## Attribute-based security
|
||||
|
||||
You can use roles and parameter substitution with the `terms_set` query to enable attribute-based security.
|
||||
|
||||
> Note that the `security_attributes` of the index need to be of type `keyword`.
|
||||
|
||||
#### User definition
|
||||
|
||||
```json
|
||||
PUT _opensearch/_security/api/internalusers/user1
|
||||
{
|
||||
"password": "asdf",
|
||||
"backend_roles": ["abac"],
|
||||
"attributes": {
|
||||
"permissions": "\"att1\", \"att2\", \"att3\""
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Role definition
|
||||
|
||||
```json
|
||||
PUT _opensearch/_security/api/roles/abac
|
||||
{
|
||||
"index_permissions": [{
|
||||
"index_patterns": [
|
||||
"*"
|
||||
],
|
||||
"dls": "{\"terms_set\": {\"security_attributes\": {\"terms\": [${attr.internal.permissions}], \"minimum_should_match_script\": {\"source\": \"doc['security_attributes'].length\"}}}}",
|
||||
"allowed_actions": [
|
||||
"read"
|
||||
]
|
||||
}]
|
||||
}
|
||||
```
|
|
@ -0,0 +1,125 @@
|
|||
---
|
||||
layout: default
|
||||
title: Field-Level Security
|
||||
parent: Access Control
|
||||
grand_parent: Security
|
||||
nav_order: 11
|
||||
---
|
||||
|
||||
# Field-level security
|
||||
|
||||
Field-level security lets you control which document fields a user can see. Just like [document-level security](../document-level-security/), you control access by index within a role.
|
||||
|
||||
The easiest way to get started with document- and field-level security is open OpenSearch Dashboards and choose **Security**. Then choose **Roles**, create a new role, and review the **Index permissions** section.
|
||||
|
||||
---
|
||||
|
||||
#### Table of contents
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Include or exclude fields
|
||||
|
||||
You have two options when you configure field-level security: include or exclude fields. If you include fields, users see *only* those fields when they retrieve a document. For example, if you include the `actors`, `title`, and `year` fields, a search result might look like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"_index": "movies",
|
||||
"_type": "_doc",
|
||||
"_source": {
|
||||
"year": 2013,
|
||||
"title": "Rush",
|
||||
"actors": [
|
||||
"Daniel Brühl",
|
||||
"Chris Hemsworth",
|
||||
"Olivia Wilde"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If you exclude fields, users see everything *but* those fields when they retrieve a document. For example, if you exclude those same fields, the same search result might look like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"_index": "movies",
|
||||
"_type": "_doc",
|
||||
"_source": {
|
||||
"directors": [
|
||||
"Ron Howard"
|
||||
],
|
||||
"plot": "A re-creation of the merciless 1970s rivalry between Formula One rivals James Hunt and Niki Lauda.",
|
||||
"genres": [
|
||||
"Action",
|
||||
"Biography",
|
||||
"Drama",
|
||||
"Sport"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can achieve the same outcomes using inclusion or exclusion, so choose whichever makes sense for your use case. Mixing the two doesn't make sense and is not supported.
|
||||
|
||||
You can specify field-level security settings using OpenSearch Dashboards, `roles.yml`, and the REST API.
|
||||
|
||||
- To exclude fields in `roles.yml` or the REST API, add `~` before the field name.
|
||||
- Field names support wildcards (`*`).
|
||||
|
||||
Wildcards are especially useful for excluding *subfields*. For example, if you index a document that has a string (e.g. `{"title": "Thor"}`), OpenSearch creates a `title` field of type `text`, but it also creates a `title.keyword` subfield of type `keyword`. In this example, to prevent unauthorized access to data in the `title` field, you must also exclude the `title.keyword` subfield. Use `title*` to match all fields that begin with `title`.
|
||||
|
||||
|
||||
### OpenSearch Dashboards
|
||||
|
||||
1. Choose a role and **Add index permission**.
|
||||
1. Choose an index pattern.
|
||||
1. Under **Field level security**, use the drop-down to select your preferred option. Then specify one or more fields and press Enter.
|
||||
|
||||
|
||||
### roles.yml
|
||||
|
||||
```yml
|
||||
someonerole:
|
||||
cluster: []
|
||||
indices:
|
||||
movies:
|
||||
'*':
|
||||
- "READ"
|
||||
_fls_:
|
||||
- "~actors"
|
||||
- "~title"
|
||||
- "~year"
|
||||
```
|
||||
|
||||
### REST API
|
||||
|
||||
See [Create role](../api/#create-role).
|
||||
|
||||
|
||||
## Interaction with multiple roles
|
||||
|
||||
If you map a user to multiple roles, we recommend that those roles use either include *or* exclude statements for each index. The security plugin evaluates field-level security settings using the `AND` operator, so combining include and exclude statements can lead to neither behavior working properly.
|
||||
|
||||
For example, in the `movies` index, if you include `actors`, `title`, and `year` in one role, exclude `actors`, `title`, and `genres` in another role, and then map both roles to the same user, a search result might look like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"_index": "movies",
|
||||
"_type": "_doc",
|
||||
"_source": {
|
||||
"year": 2013,
|
||||
"directors": [
|
||||
"Ron Howard"
|
||||
],
|
||||
"plot": "A re-creation of the merciless 1970s rivalry between Formula One rivals James Hunt and Niki Lauda."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Interaction with document-level security
|
||||
|
||||
[Document-level security](../document-level-security/) relies on OpenSearch queries, which means that all fields in the query must be visible in order for it to work properly. If you use field-level security in conjunction with document-level security, make sure you don't restrict access to the fields that document-level security uses.
|
|
@ -0,0 +1,126 @@
|
|||
---
|
||||
layout: default
|
||||
title: Field Masking
|
||||
parent: Access Control
|
||||
grand_parent: Security
|
||||
nav_order: 12
|
||||
---
|
||||
|
||||
# Field masking
|
||||
|
||||
If you don't want to remove fields from a document using [field-level security](../field-level-security/), you can mask their values. Currently, field masking is only available for string-based fields and replaces the field's value with a cryptographic hash.
|
||||
|
||||
Field masking works alongside field-level security on the same per-role, per-index basis. You can allow certain roles to see sensitive fields in plain text and mask them for others. A search result with a masked field might look like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"_index": "movies",
|
||||
"_type": "_doc",
|
||||
"_source": {
|
||||
"year": 2013,
|
||||
"directors": [
|
||||
"Ron Howard"
|
||||
],
|
||||
"title": "ca998e768dd2e6cdd84c77015feb29975f9f498a472743f159bec6f1f1db109e"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Set the salt
|
||||
|
||||
You set the salt (a random string used to hash your data) in `opensearch.yml`:
|
||||
|
||||
```yml
|
||||
opensearch_security.compliance.salt: abcdefghijklmnopqrstuvqxyz1234567890
|
||||
```
|
||||
|
||||
Property | Description
|
||||
:--- | :---
|
||||
`opensearch_security.compliance.salt` | The salt to use when generating the hash value. Must be at least 32 characters. Only ASCII characters are allowed. Optional.
|
||||
|
||||
Setting the salt is optional, but we highly recommend it.
|
||||
|
||||
|
||||
## Configure field masking
|
||||
|
||||
You configure field masking using OpenSearch Dashboards, `roles.yml`, or the REST API.
|
||||
|
||||
### OpenSearch Dashboards
|
||||
|
||||
1. Choose a role.
|
||||
1. Choose an index permission.
|
||||
1. For **Anonymization**, specify one or more fields and press Enter.
|
||||
|
||||
|
||||
### roles.yml
|
||||
|
||||
```yml
|
||||
someonerole:
|
||||
cluster: []
|
||||
indices:
|
||||
movies:
|
||||
_masked_fields_:
|
||||
- "title"
|
||||
- "genres"
|
||||
'*':
|
||||
- "READ"
|
||||
```
|
||||
|
||||
|
||||
### REST API
|
||||
|
||||
See [Create role](../api/#create-role).
|
||||
|
||||
|
||||
## (Advanced) Use an alternative hash algorithm
|
||||
|
||||
By default, the security plugin uses the BLAKE2b algorithm, but you can use any hashing algorithm that your JVM provides. This list typically includes MD5, SHA-1, SHA-384, and SHA-512.
|
||||
|
||||
To specify a different algorithm, add it after the masked field:
|
||||
|
||||
```yml
|
||||
someonerole:
|
||||
cluster: []
|
||||
indices:
|
||||
movies:
|
||||
_masked_fields_:
|
||||
- "title::SHA-512"
|
||||
- "genres"
|
||||
'*':
|
||||
- "READ"
|
||||
```
|
||||
|
||||
|
||||
## (Advanced) Pattern-based field masking
|
||||
|
||||
Rather than creating a hash, you can use one or more regular expressions and replacement strings to mask a field. The syntax is `<field>::/<regular-expression>/::<replacement-string>`. If you use multiple regular expressions, the results are passed from left to right, like piping in a shell:
|
||||
|
||||
```yml
|
||||
hr_employee:
|
||||
index_permissions:
|
||||
- index_patterns:
|
||||
- 'humanresources'
|
||||
allowed_actions:
|
||||
- ...
|
||||
masked_fields:
|
||||
- 'lastname::/.*/::*'
|
||||
- '*ip_source::/[0-9]{1,3}$/::XXX::/^[0-9]{1,3}/::***'
|
||||
someonerole:
|
||||
cluster: []
|
||||
indices:
|
||||
movies:
|
||||
_masked_fields_:
|
||||
- "title::/./::*"
|
||||
- "genres::/^[a-zA-Z]{1,3}/::XXX::/[a-zA-Z]{1,3}$/::YYY"
|
||||
'*':
|
||||
- "READ"
|
||||
|
||||
```
|
||||
|
||||
The `title` statement changes each character in the field to `*`, so you can still discern the length of the masked string. The `genres` statement changes the first three characters of the string to `XXX` and the last three characters to `YYY`.
|
||||
|
||||
|
||||
## Effect on audit logging
|
||||
|
||||
The read history feature lets you track read access to sensitive fields in your documents. For example, you might track access to the email field of your customer records. Access to masked fields are excluded from read history, because the user only saw the hash value, not the clear text value of the field.
|
|
@ -0,0 +1,49 @@
|
|||
---
|
||||
layout: default
|
||||
title: User Impersonation
|
||||
parent: Access Control
|
||||
grand_parent: Security
|
||||
nav_order: 20
|
||||
---
|
||||
|
||||
# User impersonation
|
||||
|
||||
User impersonation allows specially privileged users to act as another user without knowledge of nor access to the impersonated user's credentials.
|
||||
|
||||
Impersonation can be useful for testing and troubleshooting, or for allowing system services to safely act as a user.
|
||||
|
||||
Impersonation can occur on either the REST interface or at the transport layer.
|
||||
|
||||
|
||||
## REST interface
|
||||
|
||||
To allow one user to impersonate another, add the following to `opensearch.yml`:
|
||||
|
||||
```yml
|
||||
opensearch_security.authcz.rest_impersonation_user:
|
||||
<AUTHENTICATED_USER>:
|
||||
- <IMPERSONATED_USER_1>
|
||||
- <IMPERSONATED_USER_2>
|
||||
```
|
||||
|
||||
The impersonated user field supports wildcards. Setting it to `*` allows `AUTHENTICATED_USER` to impersonate any user.
|
||||
|
||||
|
||||
## Transport interface
|
||||
|
||||
In a similar fashion, add the following to enable transport layer impersonation:
|
||||
|
||||
```yml
|
||||
opensearch_security.authcz.impersonation_dn:
|
||||
"CN=spock,OU=client,O=client,L=Test,C=DE":
|
||||
- worf
|
||||
```
|
||||
|
||||
|
||||
## Impersonating Users
|
||||
|
||||
To impersonate another user, submit a request to the system with the HTTP header `opensearch_security_impersonate_as` set to the name of the user to be impersonated. A good test is to make a GET request to the `_opensearch/_security/authinfo` URI:
|
||||
|
||||
```bash
|
||||
curl -XGET -u 'admin:admin' -k -H "opensearch_security_impersonate_as: user_1" https://localhost:9200/_opensearch/_security/authinfo?pretty
|
||||
```
|
|
@ -0,0 +1,28 @@
|
|||
---
|
||||
layout: default
|
||||
title: Access Control
|
||||
nav_order: 10
|
||||
parent: Security
|
||||
has_children: true
|
||||
has_toc: false
|
||||
---
|
||||
|
||||
# Access control
|
||||
|
||||
After you [configure the security plugin](../configuration/) to use your own certificates and preferred authentication backend, you can start adding users, creating roles, and mapping roles to users.
|
||||
|
||||
This section of the documentation covers what a user is allowed to see and do after successfully authenticating.
|
||||
|
||||
|
||||
## Concepts
|
||||
|
||||
Term | Description
|
||||
:--- | :---
|
||||
Permission | An individual action, such as creating an index (e.g. `indices:admin/create`). For a complete list, see [Permissions](permissions/).
|
||||
Action group | A set of permissions. For example, the predefined `SEARCH` action group authorizes roles to use the `_search` and `_msearch` APIs.
|
||||
Role | Security roles define the scope of a permission or action group: cluster, index, document, or field. For example, a role named `delivery_analyst` might have no cluster permissions, the `READ` action group for all indices that match the `delivery-data-*` pattern, access to all document types within those indices, and access to all fields except `delivery_driver_name`.
|
||||
Backend role | (Optional) Arbitrary strings that you specify *or* that come from an external authentication system (e.g. LDAP/Active Directory). Backend roles can help simplify the role mapping process. Rather than mapping a role to 100 individual users, you can map the role to a single backend role that all 100 users share.
|
||||
User | Users make requests to OpenSearch clusters. A user has credentials (e.g. a username and password), zero or more backend roles, and zero or more custom attributes.
|
||||
Role mapping | Users assume roles after they successfully authenticate. Role mappings, well, map roles to users (or backend roles). For example, a mapping of `kibana_user` (role) to `jdoe` (user) means that John Doe gains all the permissions of `kibana_user` after authenticating. Likewise, a mapping of `all_access` (role) to `admin` (backend role) means that any user with the backend role of `admin` gains all the permissions of `all_access` after authenticating. You can map each role to many users and/or backend roles.
|
||||
|
||||
The security plugin comes with a number of [predefined action groups](default-action-groups/), roles, mappings, and users. These entities serve as sensible defaults and are good examples of how to use the plugin.
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue