Begin replacing static index tests with full restart tests (#24846)

These tests spin up two nodes of an older version of Elasticsearch,
create some stuff, shut down the nodes, start the current version,
and verify that the created stuff works.

You can run `gradle qa:full-cluster-restart:check` to run these
tests against the head of the previous branch of Elasticsearch
(5.x for master, 5.4 for 5.x, etc) or you can run
`gradle qa:full-cluster-restart:bwcTest` to run this test against
all "index compatible" versions, one after the other. For master
this is every released version in the 5.x.y version *and* the tip
of the 5.x branch.

I'd love to add more to these tests in the future but these
currently just cover the functionality of the `create_bwc_index.py`
script and start to cover the assertions in the
`OldIndexBackwardsCompatibilityIT` test.
This commit is contained in:
Nik Everett 2017-05-26 14:07:48 -04:00 committed by GitHub
parent bb63577f14
commit e072cc7770
8 changed files with 375 additions and 68 deletions

View File

@ -46,11 +46,11 @@ class ClusterConfiguration {
int transportPort = 0
/**
* An override of the data directory. This may only be used with a single node.
* The value is lazily evaluated at runtime as a String path.
* An override of the data directory. Input is the node number and output
* is the override data directory.
*/
@Input
Object dataDir = null
Closure<String> dataDir = null
/** Optional override of the cluster name. */
@Input

View File

@ -111,10 +111,7 @@ class NodeInfo {
homeDir = homeDir(baseDir, config.distribution, nodeVersion)
confDir = confDir(baseDir, config.distribution, nodeVersion)
if (config.dataDir != null) {
if (config.numNodes != 1) {
throw new IllegalArgumentException("Cannot set data dir for integ test with more than one node")
}
dataDir = config.dataDir
dataDir = "${config.dataDir(nodeNum)}"
} else {
dataDir = new File(homeDir, "data")
}

View File

@ -75,7 +75,7 @@ public class Version implements Comparable<Version> {
public static final int V_5_4_0_ID = 5040099;
public static final Version V_5_4_0 = new Version(V_5_4_0_ID, org.apache.lucene.util.Version.LUCENE_6_5_0);
public static final int V_5_5_0_ID_UNRELEASED = 5050099;
public static final Version V_5_5_0_UNRELEASED = new Version(V_5_5_0_ID_UNRELEASED, org.apache.lucene.util.Version.LUCENE_6_5_0);
public static final Version V_5_5_0_UNRELEASED = new Version(V_5_5_0_ID_UNRELEASED, org.apache.lucene.util.Version.LUCENE_6_5_1);
public static final int V_6_0_0_alpha1_ID_UNRELEASED = 6000001;
public static final Version V_6_0_0_alpha1_UNRELEASED =
new Version(V_6_0_0_alpha1_ID_UNRELEASED, org.apache.lucene.util.Version.LUCENE_7_0_0);

View File

@ -24,12 +24,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.elasticsearch.Version;
import org.elasticsearch.VersionTests;
import org.elasticsearch.action.admin.indices.get.GetIndexResponse;
import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse;
import org.elasticsearch.action.admin.indices.segments.IndexSegments;
import org.elasticsearch.action.admin.indices.segments.IndexShardSegments;
import org.elasticsearch.action.admin.indices.segments.IndicesSegmentResponse;
import org.elasticsearch.action.admin.indices.segments.ShardSegments;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
@ -38,7 +32,6 @@ import org.elasticsearch.action.support.WriteRequest.RefreshPolicy;
import org.elasticsearch.client.Requests;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.routing.RecoverySource;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.io.FileSystemUtils;
import org.elasticsearch.common.settings.Settings;
@ -51,9 +44,7 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.gateway.MetaDataStateFormat;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.engine.Segment;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.indices.recovery.RecoveryState;
import org.elasticsearch.node.Node;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.SearchHit;
@ -238,7 +229,6 @@ public class OldIndexBackwardsCompatibilityIT extends ESIntegTestCase {
// node startup
upgradeIndexFolder();
importIndex(indexName);
assertIndexSanity(indexName, version);
assertBasicSearchWorks(indexName);
assertAllSearchWorks(indexName);
assertBasicAggregationWorks(indexName);
@ -251,54 +241,6 @@ public class OldIndexBackwardsCompatibilityIT extends ESIntegTestCase {
unloadIndex(indexName);
}
void assertIndexSanity(String indexName, Version indexCreated) {
GetIndexResponse getIndexResponse = client().admin().indices().prepareGetIndex().addIndices(indexName).get();
assertEquals(1, getIndexResponse.indices().length);
assertEquals(indexName, getIndexResponse.indices()[0]);
Version actualVersionCreated = Version.indexCreated(getIndexResponse.getSettings().get(indexName));
assertEquals(indexCreated, actualVersionCreated);
ensureYellow(indexName);
RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(indexName)
.setDetailed(true).setActiveOnly(false).get();
boolean foundTranslog = false;
for (List<RecoveryState> states : recoveryResponse.shardRecoveryStates().values()) {
for (RecoveryState state : states) {
if (state.getStage() == RecoveryState.Stage.DONE
&& state.getPrimary()
&& state.getRecoverySource().getType() == RecoverySource.Type.EXISTING_STORE) {
assertFalse("more than one primary recoverd?", foundTranslog);
assertNotEquals(0, state.getTranslog().recoveredOperations());
foundTranslog = true;
}
}
}
assertTrue("expected translog but nothing was recovered", foundTranslog);
IndicesSegmentResponse segmentsResponse = client().admin().indices().prepareSegments(indexName).get();
IndexSegments segments = segmentsResponse.getIndices().get(indexName);
int numCurrent = 0;
int numBWC = 0;
for (IndexShardSegments indexShardSegments : segments) {
for (ShardSegments shardSegments : indexShardSegments) {
for (Segment segment : shardSegments) {
if (indexCreated.luceneVersion.equals(segment.version)) {
numBWC++;
if (Version.CURRENT.luceneVersion.equals(segment.version)) {
numCurrent++;
}
} else if (Version.CURRENT.luceneVersion.equals(segment.version)) {
numCurrent++;
} else {
fail("unexpected version " + segment.version);
}
}
}
}
assertNotEquals("expected at least 1 current segment after translog recovery", 0, numCurrent);
assertNotEquals("expected at least 1 old segment", 0, numBWC);
SearchResponse test = client().prepareSearch(indexName).get();
assertThat(test.getHits().getTotalHits(), greaterThanOrEqualTo(1L));
}
void assertBasicSearchWorks(String indexName) {
logger.info("--> testing basic search");
SearchRequestBuilder searchReq = client().prepareSearch(indexName).setQuery(QueryBuilders.matchAllQuery());

View File

@ -0,0 +1,98 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import org.elasticsearch.gradle.test.RestIntegTestTask
import org.elasticsearch.gradle.Version
apply plugin: 'elasticsearch.standalone-test'
// This is a top level task which we will add dependencies to below.
// It is a single task that can be used to backcompat tests against all versions.
task bwcTest {
description = 'Runs backwards compatibility tests.'
group = 'verification'
}
for (Version version : indexCompatVersions) {
String baseName = "v${version}"
Task oldClusterTest = tasks.create(name: "${baseName}#oldClusterTest", type: RestIntegTestTask) {
mustRunAfter(precommit)
}
tasks.getByName("${baseName}#oldClusterTestRunner").configure {
systemProperty 'tests.is_old_cluster', 'true'
systemProperty 'tests.old_cluster_version', version.toString().minus("-SNAPSHOT")
systemProperty 'tests.path.repo', new File(buildDir, "cluster/shared/repo")
}
Object extension = extensions.findByName("${baseName}#oldClusterTestCluster")
configure(extensions.findByName("${baseName}#oldClusterTestCluster")) {
distribution = 'zip'
bwcVersion = version
numBwcNodes = 2
numNodes = 2
clusterName = 'full-cluster-restart'
if (version.onOrAfter('5.3.0')) {
setting 'http.content_type.required', 'true'
}
}
Task upgradedClusterTest = tasks.create(name: "${baseName}#upgradedClusterTest", type: RestIntegTestTask) {
dependsOn(oldClusterTest, "${baseName}#oldClusterTestCluster#node0.stop")
}
configure(extensions.findByName("${baseName}#upgradedClusterTestCluster")) {
dependsOn oldClusterTest,
"${baseName}#oldClusterTestCluster#node0.stop",
"${baseName}#oldClusterTestCluster#node1.stop"
distribution = 'zip'
clusterName = 'full-cluster-restart'
numNodes = 2
dataDir = { nodeNum -> oldClusterTest.nodes[nodeNum].dataDir }
}
tasks.getByName("${baseName}#upgradedClusterTestRunner").configure {
systemProperty 'tests.is_old_cluster', 'false'
systemProperty 'tests.old_cluster_version', version.toString().minus("-SNAPSHOT")
systemProperty 'tests.path.repo', new File(buildDir, "cluster/shared/repo")
}
Task versionBwcTest = tasks.create(name: "${baseName}#bwcTest") {
dependsOn = [upgradedClusterTest]
}
/* Delay this change because the task we need to modify isn't created until
* after projects are evaluated. */
gradle.projectsEvaluated {
// Disable cleaning the repository so we can test loading a snapshot
tasks.getByName("${baseName}#upgradedClusterTestCluster#prepareCluster.cleanShared").enabled = false
}
bwcTest.dependsOn(versionBwcTest)
}
test.enabled = false // no unit tests for rolling upgrades, only the rest integration test
// basic integ tests includes testing bwc against the most recent version
task integTest {
dependsOn = ["v${indexCompatVersions[-1]}#bwcTest"]
}
check.dependsOn(integTest)

View File

@ -0,0 +1,265 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.upgrades;
import org.apache.http.ParseException;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.util.EntityUtils;
import org.elasticsearch.Version;
import org.elasticsearch.common.Booleans;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.test.rest.ESRestTestCase;
import java.io.IOException;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.util.Collections.emptyMap;
import static java.util.Collections.singletonMap;
import static org.hamcrest.Matchers.containsString;
/**
* Tests to run before and after a full cluster restart. This is run twice,
* one with {@code tests.is_old_cluster} set to {@code true} against a cluster
* of an older version. The cluster is shutdown and a cluster of the new
* version is started with the same data directories and then this is rerun
* with {@code tests.is_old_cluster} set to {@code false}.
*/
public class FullClusterRestartIT extends ESRestTestCase {
private static final String REPO = "/_snapshot/repo";
private final boolean runningAgainstOldCluster = Booleans.parseBoolean(System.getProperty("tests.is_old_cluster"));
private final Version oldClusterVersion = Version.fromString(System.getProperty("tests.old_cluster_version"));
private final boolean supportsLenientBooleans = oldClusterVersion.onOrAfter(Version.V_6_0_0_alpha1_UNRELEASED);
@Override
protected boolean preserveIndicesUponCompletion() {
return true;
}
@Override
protected boolean preserveReposUponCompletion() {
return true;
}
/**
* Tests that a single document survives. Super basic smoke test.
*/
public void testSingleDoc() throws IOException {
String docLocation = "/" + getTestName().toLowerCase(Locale.ROOT) + "/doc/1";
String doc = "{\"test\": \"test\"}";
if (runningAgainstOldCluster) {
client().performRequest("PUT", docLocation, singletonMap("refresh", "true"),
new StringEntity(doc, ContentType.APPLICATION_JSON));
}
assertThat(EntityUtils.toString(client().performRequest("GET", docLocation).getEntity()), containsString(doc));
}
public void testRandomDocumentsAndSnapshot() throws IOException {
String testName = getTestName().toLowerCase(Locale.ROOT);
String index = testName + "_data";
String infoDocument = "/" + testName + "_info/doc/info";
int count;
boolean shouldHaveTranslog;
if (runningAgainstOldCluster) {
count = between(200, 300);
/* We've had bugs in the past where we couldn't restore
* an index without a translog so we randomize whether
* or not we have one. */
shouldHaveTranslog = randomBoolean();
logger.info("Creating {} documents", count);
indexRandomDocuments(index, count, true);
createSnapshot();
// Explicitly flush so we're sure to have a bunch of documents in the Lucene index
client().performRequest("POST", "/_flush");
if (shouldHaveTranslog) {
// Update a few documents so we are sure to have a translog
indexRandomDocuments(index, count / 10, false /* Flushing here would invalidate the whole thing....*/);
}
// Record how many documents we built so we can compare later
XContentBuilder infoDoc = JsonXContent.contentBuilder().startObject();
infoDoc.field("count", count);
infoDoc.field("should_have_translog", shouldHaveTranslog);
infoDoc.endObject();
client().performRequest("PUT", infoDocument, singletonMap("refresh", "true"),
new StringEntity(infoDoc.string(), ContentType.APPLICATION_JSON));
} else {
// Load the number of documents that were written to the old cluster
String doc = EntityUtils.toString(
client().performRequest("GET", infoDocument, singletonMap("filter_path", "_source")).getEntity());
Matcher m = Pattern.compile("\"count\":(\\d+)").matcher(doc);
assertTrue(doc, m.find());
count = Integer.parseInt(m.group(1));
m = Pattern.compile("\"should_have_translog\":(true|false)").matcher(doc);
assertTrue(doc, m.find());
shouldHaveTranslog = Booleans.parseBoolean(m.group(1));
}
// Count the documents in the index to make sure we have as many as we put there
String countResponse = EntityUtils.toString(
client().performRequest("GET", "/" + index + "/_search", singletonMap("size", "0")).getEntity());
assertThat(countResponse, containsString("\"total\":" + count));
if (false == runningAgainstOldCluster) {
assertTranslogRecoveryStatistics(index, shouldHaveTranslog);
}
restoreSnapshot(index, count);
// TODO finish adding tests for the things in OldIndexBackwardsCompatibilityIT
}
// TODO tests for upgrades after shrink. We've had trouble with shrink in the past.
private void indexRandomDocuments(String index, int count, boolean flushAllowed) throws IOException {
for (int i = 0; i < count; i++) {
XContentBuilder doc = JsonXContent.contentBuilder().startObject(); {
doc.field("string", randomAlphaOfLength(10));
doc.field("int", randomInt(100));
doc.field("float", randomFloat());
// be sure to create a "proper" boolean (True, False) for the first document so that automapping is correct
doc.field("bool", i > 0 && supportsLenientBooleans ? randomLenientBoolean() : randomBoolean());
doc.field("field.with.dots", randomAlphaOfLength(10));
// TODO a binary field
}
doc.endObject();
client().performRequest("POST", "/" + index + "/doc/" + i, emptyMap(),
new StringEntity(doc.string(), ContentType.APPLICATION_JSON));
if (rarely()) {
client().performRequest("POST", "/_refresh");
}
if (flushAllowed && rarely()) {
client().performRequest("POST", "/_flush");
}
}
}
private void createSnapshot() throws IOException {
XContentBuilder repoConfig = JsonXContent.contentBuilder().startObject(); {
repoConfig.field("type", "fs");
repoConfig.startObject("settings"); {
repoConfig.field("compress", randomBoolean());
repoConfig.field("location", System.getProperty("tests.path.repo"));
}
repoConfig.endObject();
}
repoConfig.endObject();
client().performRequest("PUT", REPO, emptyMap(), new StringEntity(repoConfig.string(), ContentType.APPLICATION_JSON));
client().performRequest("PUT", REPO + "/snap", singletonMap("wait_for_completion", "true"));
}
private void assertTranslogRecoveryStatistics(String index, boolean shouldHaveTranslog) throws ParseException, IOException {
boolean restoredFromTranslog = false;
boolean foundPrimary = false;
Map<String, String> params = new HashMap<>();
params.put("h", "index,shard,type,stage,translog_ops_recovered");
params.put("s", "index,shard,type");
String recoveryResponse = EntityUtils.toString(client().performRequest("GET", "/_cat/recovery/" + index, params).getEntity());
for (String line : recoveryResponse.split("\n")) {
// Find the primaries
foundPrimary = true;
if (false == line.contains("done") && line.contains("existing_store")) {
continue;
}
/* Mark if we see a primary that looked like it restored from the translog.
* Not all primaries will look like this all the time because we modify
* random documents when we want there to be a translog and they might
* not be spread around all the shards. */
Matcher m = Pattern.compile("(\\d+)$").matcher(line);
assertTrue(line, m.find());
int translogOps = Integer.parseInt(m.group(1));
if (translogOps > 0) {
restoredFromTranslog = true;
}
}
assertTrue("expected to find a primary but didn't\n" + recoveryResponse, foundPrimary);
assertEquals("mismatch while checking for translog recovery\n" + recoveryResponse, shouldHaveTranslog, restoredFromTranslog);
String currentLuceneVersion = Version.CURRENT.luceneVersion.toString();
String bwcLuceneVersion = oldClusterVersion.luceneVersion.toString();
if (shouldHaveTranslog && false == currentLuceneVersion.equals(bwcLuceneVersion)) {
int numCurrentVersion = 0;
int numBwcVersion = 0;
params.clear();
params.put("h", "prirep,shard,index,version");
params.put("s", "prirep,shard,index");
String segmentsResponse = EntityUtils.toString(
client().performRequest("GET", "/_cat/segments/" + index, params).getEntity());
for (String line : segmentsResponse.split("\n")) {
if (false == line.startsWith("p")) {
continue;
}
Matcher m = Pattern.compile("(\\d+\\.\\d+\\.\\d+)$").matcher(line);
assertTrue(line, m.find());
String version = m.group(1);
if (currentLuceneVersion.equals(version)) {
numCurrentVersion++;
} else if (bwcLuceneVersion.equals(version)) {
numBwcVersion++;
} else {
fail("expected version to be one of [" + currentLuceneVersion + "," + bwcLuceneVersion + "] but was" + line);
}
}
assertNotEquals("expected at least 1 current segment after translog recovery", 0, numCurrentVersion);
assertNotEquals("expected at least 1 old segment", 0, numBwcVersion);
}
}
private void restoreSnapshot(String index, int count) throws ParseException, IOException {
if (false == runningAgainstOldCluster) {
/* Remove any "restored" indices from the old cluster run of this test.
* We intentionally don't remove them while running this against the
* old cluster so we can test starting the node with a restored index
* in the cluster. */
client().performRequest("DELETE", "/restored_*");
}
if (runningAgainstOldCluster) {
// TODO restoring the snapshot seems to fail! This seems like a bug.
XContentBuilder restoreCommand = JsonXContent.contentBuilder().startObject();
restoreCommand.field("include_global_state", false);
restoreCommand.field("indices", index);
restoreCommand.field("rename_pattern", index);
restoreCommand.field("rename_replacement", "restored_" + index);
restoreCommand.endObject();
client().performRequest("POST", REPO + "/snap/_restore", singletonMap("wait_for_completion", "true"),
new StringEntity(restoreCommand.string(), ContentType.APPLICATION_JSON));
String countResponse = EntityUtils.toString(
client().performRequest("GET", "/restored_" + index + "/_search", singletonMap("size", "0")).getEntity());
assertThat(countResponse, containsString("\"total\":" + count));
}
}
private Object randomLenientBoolean() {
return randomFrom(new Object[] {"off", "no", "0", 0, "false", false, "on", "yes", "1", 1, "true", true});
}
}

View File

@ -61,7 +61,9 @@ for (Version version : wireCompatVersions) {
distribution = 'zip'
clusterName = 'rolling-upgrade'
unicastTransportUri = { seedNode, node, ant -> oldClusterTest.nodes.get(0).transportUri() }
dataDir = "${-> oldClusterTest.nodes[1].dataDir}"
/* Override the data directory so the new node always gets the node we
* just stopped's data directory. */
dataDir = { nodeNumber -> oldClusterTest.nodes[1].dataDir }
setting 'repositories.url.allowed_urls', 'http://snapshot.test*'
}
@ -79,7 +81,9 @@ for (Version version : wireCompatVersions) {
distribution = 'zip'
clusterName = 'rolling-upgrade'
unicastTransportUri = { seedNode, node, ant -> mixedClusterTest.nodes.get(0).transportUri() }
dataDir = "${-> oldClusterTest.nodes[0].dataDir}"
/* Override the data directory so the new node always gets the node we
* just stopped's data directory. */
dataDir = { nodeNumber -> oldClusterTest.nodes[0].dataDir}
setting 'repositories.url.allowed_urls', 'http://snapshot.test*'
}

View File

@ -62,6 +62,7 @@ List projects = [
'plugins:store-smb',
'qa:auto-create-index',
'qa:evil-tests',
'qa:full-cluster-restart',
'qa:mixed-cluster',
'qa:multi-cluster-search',
'qa:no-bootstrap-tests',