OpenSearch/dev-tools/create_bwc_indexes.py

468 lines
18 KiB
Python
Raw Normal View History

# ELASTICSEARCH CONFIDENTIAL
# __________________
#
# [2014] Elasticsearch Incorporated. All Rights Reserved.
#
# NOTICE: All information contained herein is, and remains
# the property of Elasticsearch Incorporated and its suppliers,
# if any. The intellectual and technical concepts contained
# herein are proprietary to Elasticsearch Incorporated
# and its suppliers and may be covered by U.S. and Foreign Patents,
# patents in process, and are protected by trade secret or copyright law.
# Dissemination of this information or reproduction of this material
# is strictly forbidden unless prior written permission is obtained
# from Elasticsearch Incorporated.
# Creates indices with old versions of elasticsearch. These indices are used by x-pack plugins like security
# to test if the import of metadata that is stored in elasticsearch indexes works correctly.
# This tool will start a node on port 9200/9300. If a node is already running on that port then the script will fail.
# Currently this script can only deal with versions >=2.0.0 and < 5.0. Needs more work for versions before or after.
#
# Run from x-plugins root directory like so:
# python3 ./dev-tools/create_bwc_indexes.py 2.3.4
# You can get any particular version of elasticsearch with:
# python3 ../elasticsearch/dev-tools/get-bwc-version.py 2.3.4
#
#
import argparse
import glob
import json
import logging
import os
import random
import shutil
import subprocess
import sys
import tempfile
import time
import requests
import socket
import signal
from cmd import Cmd
DEFAULT_TRANSPORT_TCP_PORT = 9300
DEFAULT_HTTP_TCP_PORT = 9200
if sys.version_info[0] < 3:
print('%s must use python 3.x (for the ES python client)' % sys.argv[0])
try:
from elasticsearch import Elasticsearch
from elasticsearch.exceptions import ConnectionError
from elasticsearch.exceptions import TransportError
from elasticsearch.exceptions import NotFoundError
from elasticsearch.client import IndicesClient
except ImportError as e:
print('Can\'t import elasticsearch please install `sudo pip3 install elasticsearch`')
sys.exit(1)
def start_node(version, release_dir, data_dir):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
result = sock.connect_ex(('localhost',DEFAULT_HTTP_TCP_PORT))
if result == 0:
raise Exception('Elasticsearch instance already running on port ' + str(DEFAULT_HTTP_TCP_PORT))
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
result = sock.connect_ex(('localhost',DEFAULT_TRANSPORT_TCP_PORT))
if result == 0:
raise Exception('Elasticsearch instance already running on port ' + str(DEFAULT_TRANSPORT_TCP_PORT))
logging.info('Starting node from %s on port %s/%s, data_dir %s' % (release_dir, DEFAULT_TRANSPORT_TCP_PORT
, DEFAULT_HTTP_TCP_PORT, data_dir))
cluster_name = 'bwc_index_' + version
cmd = [
os.path.join(release_dir, 'bin/elasticsearch'),
'-Des.path.data=%s' %(data_dir),
'-Des.path.logs=logs',
'-Des.cluster.name=%s' %(cluster_name),
'-Des.network.host=localhost',
'-Des.transport.tcp.port=%s' %(DEFAULT_TRANSPORT_TCP_PORT), # not sure if we need to customize ports
'-Des.http.port=%s' %(DEFAULT_HTTP_TCP_PORT)
]
return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def install_plugin(version, release_dir, plugin_name):
args = [plugin_name]
if parse_version(version) >= parse_version('2.2.0'):
args = [plugin_name, '--batch']
run_plugin(version, release_dir, 'install', args)
def remove_plugin(version, release_dir, plugin_name):
run_plugin(version, release_dir, 'remove', [plugin_name])
def run_plugin(version, release_dir, plugin_cmd, args):
cmd = [os.path.join(release_dir, 'bin/plugin'), plugin_cmd] + args
subprocess.check_call(cmd)
def create_client():
logging.info('Waiting for node to startup')
for _ in range(0, 30):
try:
client = Elasticsearch([{'host': 'localhost', 'port': 9200, 'http_auth':'es_admin:0123456789'}])
health = client.cluster.health(wait_for_nodes=1)
return client
except ConnectionError:
logging.info('Not started yet...')
time.sleep(1)
assert False, 'Timed out waiting for node for %s seconds' % timeout
# this adds a user bwc_test_role/9876543210, a role bwc_test_role and some documents the user has or has not access to
def generate_security_index(client, version):
logging.info('Add a group')
# don't know how to use python client with shield so use curl instead
# add a user
body = {
"password" : "9876543210",
"roles" : [ "bwc_test_role" ]
}
while True:
response = requests.put('http://localhost:9200/_shield/user/bwc_test_user', auth=('es_admin', '0123456789'), data=json.dumps(body))
logging.info('put user reponse: ' + response.text)
if response.status_code == 200:
break
else:
if 'service has not been started' in response.text:
continue
raise Exception('PUT http://localhost:9200/_shield/role/bwc_test_role did not succeed!')
# add a role
body = {
"cluster": ["all"],
"indices": [
{
"names": [ "index1", "index2" ],
"privileges": ["all"],
"fields": [ "title", "body" ],
"query": "{\"match\": {\"title\": \"foo\"}}"
}
],
"run_as": [ "other_user" ]
}
# order of params in put role request is important, see https://github.com/elastic/x-plugins/issues/2606
response = requests.put('http://localhost:9200/_shield/role/bwc_test_role', auth=('es_admin', '0123456789')
, data=json.dumps(body, sort_keys=True))
logging.info('put user reponse: ' + response.text)
if (response.status_code != 200) :
raise Exception('PUT http://localhost:9200/_shield/role/bwc_test_role did not succeed!')
client.index(index="index1", doc_type="doc", body={"title": "foo",
"body": "bwc_test_user should be able to see this field",
"secured_body": "bwc_test_user should not be able to see this field"})
client.index(index="index1", doc_type="doc", body={"title": "bwc_test_user should not be able to see this document"})
client.index(index="index2", doc_type="doc", body={"title": "foo",
"body": "bwc_test_user should be able to see this field",
"secured_body": "bwc_test_user should not be able to see this field"})
client.index(index="index2", doc_type="doc", body={"title": "bwc_test_user should not be able to see this document"})
client.index(index="index3", doc_type="doc", body={"title": "bwc_test_user should not see this index"})
logging.info('Waiting for yellow')
health = client.cluster.health(wait_for_status='yellow', wait_for_relocating_shards=0, index='.security')
assert health['timed_out'] == False, 'cluster health timed out %s' % health
# this adds a couple of watches and waits for the the watch_history to accumulate some results
def generate_watcher_index(client, version):
logging.info('Adding a watch')
body = {
"trigger" : {
"schedule": {
"interval": "1s"
}
},
"input" : {
"search" : {
"timeout": "100s",
"request" : {
"indices" : [ ".watches" ],
"body" : {
"query" : { "match_all" : {}},
"size": 1
},
}
}
},
"condition" : {
"always" : {}
},
"throttle_period": "1s",
"actions" : {
"index_payload" : {
"transform" : {
"search" : {
"request" : {
"body" : { "size": 1, "query" : { "match_all" : {} }}
},
"timeout": "100s"
}
},
"index" : {
"index" : "bwc_watch_index",
"doc_type" : "bwc_watch_type",
"timeout": "100s"
}
}
}
}
response = requests.put('http://localhost:9200/_watcher/watch/bwc_watch', auth=('es_admin', '0123456789'), data=json.dumps(body))
logging.info('PUT watch response: ' + response.text)
if (response.status_code != 201) :
raise Exception('PUT http://localhost:9200/_watcher/watch/bwc_watch did not succeed!')
logging.info('Adding a watch with "fun" throttle periods')
body = {
"trigger" : {
"schedule": {
"interval": "1s"
}
},
"condition" : {
"never" : {}
},
"throttle_period": "100s",
"actions" : {
"index_payload" : {
"throttle_period": "100s",
"transform" : {
"search" : {
"request" : {
"body" : { "size": 1, "query" : { "match_all" : {} }}
}
}
},
"index" : {
"index" : "bwc_watch_index",
"doc_type" : "bwc_watch_type"
}
}
}
}
response = requests.put('http://localhost:9200/_watcher/watch/bwc_throttle_period', auth=('es_admin', '0123456789'), data=json.dumps(body))
logging.info('PUT watch response: ' + response.text)
if (response.status_code != 201) :
raise Exception('PUT http://localhost:9200/_watcher/watch/bwc_throttle_period did not succeed!')
if parse_version(version) < parse_version('2.3.0'):
logging.info('Skipping watch with a funny read timeout because email attachement is not supported by this version')
else:
logging.info('Adding a watch with a funny read timeout')
body = {
"trigger" : {
"schedule": {
"interval": "100s"
}
},
"condition": {
"never": {}
},
"actions": {
"work": {
"email": {
"to": "email@domain.com",
"subject": "Test Kibana PDF report",
"attachments": {
"test_report.pdf": {
"http": {
"content_type": "application/pdf",
"request": {
"read_timeout": "100s",
"scheme": "https",
"host": "example.com",
"path":"{{ctx.metadata.report_url}}",
"port": 8443,
"auth": {
"basic": {
"username": "Aladdin",
"password": "open sesame"
}
}
}
}
}
}
}
}
}
}
response = requests.put('http://localhost:9200/_watcher/watch/bwc_funny_timeout', auth=('es_admin', '0123456789'), data=json.dumps(body))
logging.info('PUT watch response: ' + response.text)
if (response.status_code != 201) :
raise Exception('PUT http://localhost:9200/_watcher/watch/bwc_funny_timeout did not succeed!')
# wait to accumulate some watches
logging.info('Waiting for watch results index to fill up...')
wait_for_search(10, lambda: client.search(index="bwc_watch_index", body={"query": {"match_all": {}}}))
health = client.cluster.health(wait_for_status='yellow', wait_for_relocating_shards=0, index='.watches')
assert health['timed_out'] == False, 'cluster health timed out %s' % health
health = client.cluster.health(wait_for_status='yellow', wait_for_relocating_shards=0, index='.watch_history*')
assert health['timed_out'] == False, 'cluster health timed out %s' % health
health = client.cluster.health(wait_for_status='yellow', wait_for_relocating_shards=0, index='bwc_watch_index')
assert health['timed_out'] == False, 'cluster health timed out %s' % health
def wait_for_monitoring_index_to_fill(client, version):
logging.info('Waiting for marvel to index the cluster_info...')
wait_for_search(1, lambda: client.search(index=".marvel-es-*", doc_type="cluster_info", body={"query": {"match_all": {}}}))
if parse_version(version) >= parse_version('2.1.0'):
logging.info('Waiting for marvel to index the node information...')
wait_for_search(1, lambda: client.search(index=".marvel-es-*", doc_type="node", body={"query": {"match_all": {}}}))
logging.info('Waiting for marvel index to get enough index_stats...')
wait_for_search(10, lambda: client.search(index=".marvel-es-*", doc_type="index_stats", body={"query": {"match_all": {}}}))
logging.info('Waiting for marvel index to get enough shards...')
wait_for_search(10, lambda: client.search(index=".marvel-es-*", doc_type="shards", body={"query": {"match_all": {}}}))
logging.info('Waiting for marvel index to get enough indices_stats...')
wait_for_search(3, lambda: client.search(index=".marvel-es-*", doc_type="indices_stats", body={"query": {"match_all": {}}}))
logging.info('Waiting for marvel index to get enough node_stats...')
wait_for_search(3, lambda: client.search(index=".marvel-es-*", doc_type="node_stats", body={"query": {"match_all": {}}}))
logging.info('Waiting for marvel index to get enough cluster_state...')
wait_for_search(3, lambda: client.search(index=".marvel-es-*", doc_type="cluster_state", body={"query": {"match_all": {}}}))
def wait_for_search(required_count, searcher):
for attempt in range(1, 31):
try:
response = searcher()
logging.info('(' + str(attempt) + ') Got ' + str(response['hits']['total']) + ' hits and want ' + str(required_count) + '...')
if response['hits']['total'] >= required_count:
return
except NotFoundError:
logging.info('(' + str(attempt) + ') Not found, retrying')
time.sleep(1)
logger.error("Ran out of retries")
raise "Ran out of retries"
def compress_index(version, tmp_dir, output_dir):
compress(tmp_dir, output_dir, 'x-pack-%s.zip' % version, 'data')
def compress(tmp_dir, output_dir, zipfile, directory):
abs_output_dir = os.path.abspath(output_dir)
zipfile = os.path.join(abs_output_dir, zipfile)
if os.path.exists(zipfile):
os.remove(zipfile)
logging.info('Compressing index into %s, tmpDir %s', zipfile, tmp_dir)
olddir = os.getcwd()
os.chdir(tmp_dir)
subprocess.check_call('zip -r %s %s' % (zipfile, directory), shell=True)
os.chdir(olddir)
def parse_config():
parser = argparse.ArgumentParser(description='Builds an elasticsearch index for backwards compatibility tests')
required = parser.add_mutually_exclusive_group(required=True)
required.add_argument('versions', metavar='X.Y.Z', nargs='*', default=[],
help='The elasticsearch version to build an index for')
required.add_argument('--all', action='store_true', default=False,
help='Recreate all existing backwards compatibility indexes')
parser.add_argument('--releases-dir', '-d', default='backwards', metavar='DIR',
help='The directory containing elasticsearch releases')
parser.add_argument('--output-dir', '-o', default='elasticsearch/src/test/resources/indices/bwc/',
help='The directory to write the zipped index into')
cfg = parser.parse_args()
if not os.path.exists(cfg.output_dir):
parser.error('Output directory does not exist: %s' % cfg.output_dir)
if not cfg.versions:
# --all
for bwc_index in glob.glob(os.path.join(cfg.output_dir, 'x-pack-*.zip')):
version = os.path.basename(bwc_index)[len('x-pack-'):-len('.zip')]
cfg.versions.append(version)
return cfg
def shutdown_node(node):
logging.info('Shutting down node with pid %d', node.pid)
node.terminate()
node.wait()
def parse_version(version):
import re
splitted = re.split('[.-]', version)
if len(splitted) == 3:
splitted = splitted + ['GA']
splitted = [s.lower() for s in splitted]
assert len(splitted) == 4;
return splitted
def run(command, env_vars=None):
if env_vars:
for key, value in env_vars.items():
os.putenv(key, value)
logging.info('*** Running: %s%s%s' % (COLOR_OK, command, COLOR_END))
if os.system(command):
raise RuntimeError(' FAILED: %s' % (command))
assert parse_version('1.2.3') < parse_version('2.1.0')
assert parse_version('1.2.3') < parse_version('1.2.4')
assert parse_version('1.1.0') < parse_version('1.2.0')
# console colors
COLOR_OK = '\033[92m'
COLOR_END = '\033[0m'
COLOR_FAIL = '\033[91m'
def main():
logging.basicConfig(format='[%(levelname)s] [%(asctime)s] %(message)s', level=logging.INFO,
datefmt='%Y-%m-%d %I:%M:%S %p')
logging.getLogger('elasticsearch').setLevel(logging.ERROR)
logging.getLogger('urllib3').setLevel(logging.WARN)
cfg = parse_config()
for version in cfg.versions:
logging.info('--> Creating x-pack index for %s' % version)
# setup for starting nodes
release_dir = os.path.join(cfg.releases_dir, 'elasticsearch-%s' % version)
if not os.path.exists(release_dir):
raise RuntimeError('ES version %s does not exist in %s' % (version, cfg.releases_dir))
tmp_dir = tempfile.mkdtemp()
data_dir = os.path.join(tmp_dir, 'data')
logging.info('Temp data dir: %s' % data_dir)
node = None
try:
# Remove old plugins just in case any are around
remove_plugin(version, release_dir, 'marvel-agent')
remove_plugin(version, release_dir, 'watcher')
remove_plugin(version, release_dir, 'shield')
remove_plugin(version, release_dir, 'license')
# Remove the shield config too before fresh install
run('rm -rf %s' %(os.path.join(release_dir, 'config/shield')))
# Install the plugins we'll need
install_plugin(version, release_dir, 'license')
install_plugin(version, release_dir, 'shield')
install_plugin(version, release_dir, 'watcher')
install_plugin(version, release_dir, 'marvel-agent')
# create admin
run('%s useradd es_admin -r admin -p 0123456789' %(os.path.join(release_dir, 'bin/shield/esusers')))
node = start_node(version, release_dir, data_dir)
# create a client that authenticates as es_admin
client = create_client()
if parse_version(version) < parse_version('2.3.0'):
logging.info('Version is ' + version + ' but shield supports native realm oly from 2.3.0 on. Nothing to do for Shield.')
else:
generate_security_index(client, version)
generate_watcher_index(client, version)
wait_for_monitoring_index_to_fill(client, version)
shutdown_node(node)
node = None
compress_index(version, tmp_dir, cfg.output_dir)
finally:
if node is not None:
# This only happens if we've hit an exception:
shutdown_node(node)
shutil.rmtree(tmp_dir)
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
logging.info('Caught keyboard interrupt, exiting...')
sys.exit(signal.SIGTERM) # exit code