Merge pull request #16745 from s1monw/probe_write_access

Assert that we can write in all data-path on startup

Today we might start a node and some of the paths might not have the
required permissions. This commit goes through all data directories as
well as index, shard and state directories and ensures we have write access.
To make this work across all OS etc. we are trying to write a real file
and remove it again in each of those directories
This commit is contained in:
Simon Willnauer 2016-02-22 10:38:49 -08:00
commit 35ae8263af
4 changed files with 252 additions and 66 deletions

View File

@ -40,9 +40,11 @@ import org.elasticsearch.common.settings.Setting.Scope;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.gateway.MetaDataStateFormat;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.shard.ShardPath;
import org.elasticsearch.index.store.FsDirectoryService; import org.elasticsearch.index.store.FsDirectoryService;
import org.elasticsearch.monitor.fs.FsInfo; import org.elasticsearch.monitor.fs.FsInfo;
import org.elasticsearch.monitor.fs.FsProbe; import org.elasticsearch.monitor.fs.FsProbe;
@ -76,7 +78,7 @@ import static java.util.Collections.unmodifiableSet;
/** /**
* A component that holds all data paths for a single node. * A component that holds all data paths for a single node.
*/ */
public class NodeEnvironment extends AbstractComponent implements Closeable { public final class NodeEnvironment extends AbstractComponent implements Closeable {
public static class NodePath { public static class NodePath {
/* ${data.paths}/nodes/{node.id} */ /* ${data.paths}/nodes/{node.id} */
public final Path path; public final Path path;
@ -167,63 +169,71 @@ public class NodeEnvironment extends AbstractComponent implements Closeable {
localNodeId = -1; localNodeId = -1;
return; return;
} }
final NodePath[] nodePaths = new NodePath[environment.dataWithClusterFiles().length]; final NodePath[] nodePaths = new NodePath[environment.dataWithClusterFiles().length];
final Lock[] locks = new Lock[nodePaths.length]; final Lock[] locks = new Lock[nodePaths.length];
sharedDataPath = environment.sharedDataFile(); boolean success = false;
int localNodeId = -1; try {
IOException lastException = null; sharedDataPath = environment.sharedDataFile();
int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings); int localNodeId = -1;
for (int possibleLockId = 0; possibleLockId < maxLocalStorageNodes; possibleLockId++) { IOException lastException = null;
for (int dirIndex = 0; dirIndex < environment.dataWithClusterFiles().length; dirIndex++) { int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings);
Path dir = environment.dataWithClusterFiles()[dirIndex].resolve(NODES_FOLDER).resolve(Integer.toString(possibleLockId)); for (int possibleLockId = 0; possibleLockId < maxLocalStorageNodes; possibleLockId++) {
Files.createDirectories(dir); for (int dirIndex = 0; dirIndex < environment.dataWithClusterFiles().length; dirIndex++) {
Path dir = environment.dataWithClusterFiles()[dirIndex].resolve(NODES_FOLDER).resolve(Integer.toString(possibleLockId));
Files.createDirectories(dir);
try (Directory luceneDir = FSDirectory.open(dir, NativeFSLockFactory.INSTANCE)) { try (Directory luceneDir = FSDirectory.open(dir, NativeFSLockFactory.INSTANCE)) {
logger.trace("obtaining node lock on {} ...", dir.toAbsolutePath()); logger.trace("obtaining node lock on {} ...", dir.toAbsolutePath());
try { try {
locks[dirIndex] = luceneDir.obtainLock(NODE_LOCK_FILENAME); locks[dirIndex] = luceneDir.obtainLock(NODE_LOCK_FILENAME);
nodePaths[dirIndex] = new NodePath(dir, environment); nodePaths[dirIndex] = new NodePath(dir, environment);
localNodeId = possibleLockId; localNodeId = possibleLockId;
} catch (LockObtainFailedException ex) { } catch (LockObtainFailedException ex) {
logger.trace("failed to obtain node lock on {}", dir.toAbsolutePath()); logger.trace("failed to obtain node lock on {}", dir.toAbsolutePath());
// release all the ones that were obtained up until now
releaseAndNullLocks(locks);
break;
}
} catch (IOException e) {
logger.trace("failed to obtain node lock on {}", e, dir.toAbsolutePath());
lastException = new IOException("failed to obtain lock on " + dir.toAbsolutePath(), e);
// release all the ones that were obtained up until now // release all the ones that were obtained up until now
releaseAndNullLocks(locks); releaseAndNullLocks(locks);
break; break;
} }
}
} catch (IOException e) { if (locks[0] != null) {
logger.trace("failed to obtain node lock on {}", e, dir.toAbsolutePath()); // we found a lock, break
lastException = new IOException("failed to obtain lock on " + dir.toAbsolutePath(), e);
// release all the ones that were obtained up until now
releaseAndNullLocks(locks);
break; break;
} }
} }
if (locks[0] != null) {
// we found a lock, break if (locks[0] == null) {
break; throw new IllegalStateException("Failed to obtain node lock, is the following location writable?: "
+ Arrays.toString(environment.dataWithClusterFiles()), lastException);
}
this.localNodeId = localNodeId;
this.locks = locks;
this.nodePaths = nodePaths;
if (logger.isDebugEnabled()) {
logger.debug("using node location [{}], local_node_id [{}]", nodePaths, localNodeId);
}
maybeLogPathDetails();
maybeLogHeapDetails();
applySegmentInfosTrace(settings);
assertCanWrite();
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(locks);
} }
} }
if (locks[0] == null) {
throw new IllegalStateException("Failed to obtain node lock, is the following location writable?: "
+ Arrays.toString(environment.dataWithClusterFiles()), lastException);
}
this.localNodeId = localNodeId;
this.locks = locks;
this.nodePaths = nodePaths;
if (logger.isDebugEnabled()) {
logger.debug("using node location [{}], local_node_id [{}]", nodePaths, localNodeId);
}
maybeLogPathDetails();
maybeLogHeapDetails();
applySegmentInfosTrace(settings);
} }
private static void releaseAndNullLocks(Lock[] locks) { private static void releaseAndNullLocks(Lock[] locks) {
@ -793,7 +803,7 @@ public class NodeEnvironment extends AbstractComponent implements Closeable {
} }
@Override @Override
public void close() { public final void close() {
if (closed.compareAndSet(false, true) && locks != null) { if (closed.compareAndSet(false, true) && locks != null) {
for (Lock lock : locks) { for (Lock lock : locks) {
try { try {
@ -909,4 +919,45 @@ public class NodeEnvironment extends AbstractComponent implements Closeable {
return shardPath.getParent().getParent().getParent(); return shardPath.getParent().getParent().getParent();
} }
/**
* This is a best effort to ensure that we actually have write permissions to write in all our data directories.
* This prevents disasters if nodes are started under the wrong username etc.
*/
private void assertCanWrite() throws IOException {
for (Path path : nodeDataPaths()) { // check node-paths are writable
tryWriteTempFile(path);
}
for (String index : this.findAllIndices()) {
for (Path path : this.indexPaths(index)) { // check index paths are writable
Path statePath = path.resolve(MetaDataStateFormat.STATE_DIR_NAME);
tryWriteTempFile(statePath);
tryWriteTempFile(path);
}
for (ShardId shardID : this.findAllShardIds(new Index(index, IndexMetaData.INDEX_UUID_NA_VALUE))) {
Path[] paths = this.availableShardPaths(shardID);
for (Path path : paths) { // check shard paths are writable
Path indexDir = path.resolve(ShardPath.INDEX_FOLDER_NAME);
Path statePath = path.resolve(MetaDataStateFormat.STATE_DIR_NAME);
Path translogDir = path.resolve(ShardPath.TRANSLOG_FOLDER_NAME);
tryWriteTempFile(indexDir);
tryWriteTempFile(translogDir);
tryWriteTempFile(statePath);
tryWriteTempFile(path);
}
}
}
}
private static void tryWriteTempFile(Path path) throws IOException {
if (Files.exists(path)) {
Path resolve = path.resolve(".es_temp_file");
try {
Files.createFile(resolve);
Files.deleteIfExists(resolve);
} catch (IOException ex) {
throw new IOException("failed to write in data directory [" + path + "] write permission is required", ex);
}
}
}
} }

View File

@ -0,0 +1,101 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.env;
import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.PosixPermissionsResetter;
import org.junit.BeforeClass;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.PosixFileAttributeView;
import java.nio.file.attribute.PosixFilePermission;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
public class NodeEnvironmentEvilTests extends ESTestCase {
private static boolean isPosix;
@BeforeClass
public static void checkPosix() throws IOException {
isPosix = Files.getFileAttributeView(createTempFile(), PosixFileAttributeView.class) != null;
}
public void testMissingWritePermission() throws IOException {
assumeTrue("posix filesystem", isPosix);
final String[] tempPaths = tmpPaths();
Path path = PathUtils.get(randomFrom(tempPaths));
try (PosixPermissionsResetter attr = new PosixPermissionsResetter(path)) {
attr.setPermissions(new HashSet<>(Arrays.asList(PosixFilePermission.OTHERS_READ, PosixFilePermission.GROUP_READ, PosixFilePermission.OWNER_READ)));
Settings build = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
.putArray(Environment.PATH_DATA_SETTING.getKey(), tempPaths).build();
IOException ioException = expectThrows(IOException.class, () -> {
new NodeEnvironment(build, new Environment(build));
});
assertTrue(ioException.getMessage(), ioException.getMessage().startsWith(path.toString()));
}
}
public void testMissingWritePermissionOnIndex() throws IOException {
assumeTrue("posix filesystem", isPosix);
final String[] tempPaths = tmpPaths();
Path path = PathUtils.get(randomFrom(tempPaths));
Path fooIndex = path.resolve("elasticsearch").resolve("nodes").resolve("0").resolve(NodeEnvironment.INDICES_FOLDER).resolve("foo");
Files.createDirectories(fooIndex);
try (PosixPermissionsResetter attr = new PosixPermissionsResetter(fooIndex)) {
attr.setPermissions(new HashSet<>(Arrays.asList(PosixFilePermission.OTHERS_READ, PosixFilePermission.GROUP_READ, PosixFilePermission.OWNER_READ)));
Settings build = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
.putArray(Environment.PATH_DATA_SETTING.getKey(), tempPaths).build();
IOException ioException = expectThrows(IOException.class, () -> {
new NodeEnvironment(build, new Environment(build));
});
assertTrue(ioException.getMessage(), ioException.getMessage().startsWith("failed to write in data directory"));
}
}
public void testMissingWritePermissionOnShard() throws IOException {
assumeTrue("posix filesystem", isPosix);
final String[] tempPaths = tmpPaths();
Path path = PathUtils.get(randomFrom(tempPaths));
Path fooIndex = path.resolve("elasticsearch").resolve("nodes").resolve("0").resolve(NodeEnvironment.INDICES_FOLDER).resolve("foo");
Path fooShard = fooIndex.resolve("0");
Path fooShardIndex = fooShard.resolve("index");
Path fooShardTranslog = fooShard.resolve("translog");
Path fooShardState = fooShard.resolve("_state");
Path pick = randomFrom(fooShard, fooShardIndex, fooShardTranslog, fooShardState);
Files.createDirectories(pick);
try (PosixPermissionsResetter attr = new PosixPermissionsResetter(pick)) {
attr.setPermissions(new HashSet<>(Arrays.asList(PosixFilePermission.OTHERS_READ, PosixFilePermission.GROUP_READ, PosixFilePermission.OWNER_READ)));
Settings build = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
.putArray(Environment.PATH_DATA_SETTING.getKey(), tempPaths).build();
IOException ioException = expectThrows(IOException.class, () -> {
new NodeEnvironment(build, new Environment(build));
});
assertTrue(ioException.getMessage(), ioException.getMessage().startsWith("failed to write in data directory"));
}
}
}

View File

@ -51,6 +51,7 @@ import org.elasticsearch.common.cli.UserError;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.PosixPermissionsResetter;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@LuceneTestCase.SuppressFileSystems("*") @LuceneTestCase.SuppressFileSystems("*")
@ -63,24 +64,6 @@ public class InstallPluginCommandTests extends ESTestCase {
isPosix = Files.getFileAttributeView(createTempFile(), PosixFileAttributeView.class) != null; isPosix = Files.getFileAttributeView(createTempFile(), PosixFileAttributeView.class) != null;
} }
/** Stores the posix attributes for a path and resets them on close. */
static class PosixPermissionsResetter implements AutoCloseable {
private final PosixFileAttributeView attributeView;
final Set<PosixFilePermission> permissions;
public PosixPermissionsResetter(Path path) throws IOException {
attributeView = Files.getFileAttributeView(path, PosixFileAttributeView.class);
assertNotNull(attributeView);
permissions = attributeView.readAttributes().permissions();
}
@Override
public void close() throws IOException {
attributeView.setPermissions(permissions);
}
public void setPermissions(Set<PosixFilePermission> newPermissions) throws IOException {
attributeView.setPermissions(newPermissions);
}
}
/** Creates a test environment with bin, config and plugins directories. */ /** Creates a test environment with bin, config and plugins directories. */
static Environment createEnv() throws IOException { static Environment createEnv() throws IOException {
Path home = createTempDir(); Path home = createTempDir();
@ -103,7 +86,7 @@ public class InstallPluginCommandTests extends ESTestCase {
} }
} }
} }
static String writeZip(Path structure, String prefix) throws IOException { static String writeZip(Path structure, String prefix) throws IOException {
Path zip = createTempDir().resolve(structure.getFileName() + ".zip"); Path zip = createTempDir().resolve(structure.getFileName() + ".zip");
try (ZipOutputStream stream = new ZipOutputStream(Files.newOutputStream(zip))) { try (ZipOutputStream stream = new ZipOutputStream(Files.newOutputStream(zip))) {
@ -382,7 +365,7 @@ public class InstallPluginCommandTests extends ESTestCase {
Files.createFile(binDir.resolve("somescript")); Files.createFile(binDir.resolve("somescript"));
String pluginZip = createPlugin("fake", pluginDir); String pluginZip = createPlugin("fake", pluginDir);
try (PosixPermissionsResetter binAttrs = new PosixPermissionsResetter(env.binFile())) { try (PosixPermissionsResetter binAttrs = new PosixPermissionsResetter(env.binFile())) {
Set<PosixFilePermission> perms = new HashSet<>(binAttrs.permissions); Set<PosixFilePermission> perms = binAttrs.getCopyPermissions();
// make sure at least one execute perm is missing, so we know we forced it during installation // make sure at least one execute perm is missing, so we know we forced it during installation
perms.remove(PosixFilePermission.GROUP_EXECUTE); perms.remove(PosixFilePermission.GROUP_EXECUTE);
binAttrs.setPermissions(perms); binAttrs.setPermissions(perms);

View File

@ -0,0 +1,51 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test;
import org.junit.Assert;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.PosixFileAttributeView;
import java.nio.file.attribute.PosixFilePermission;
import java.util.HashSet;
import java.util.Set;
/** Stores the posix attributes for a path and resets them on close. */
public class PosixPermissionsResetter implements AutoCloseable {
private final PosixFileAttributeView attributeView;
private final Set<PosixFilePermission> permissions;
public PosixPermissionsResetter(Path path) throws IOException {
attributeView = Files.getFileAttributeView(path, PosixFileAttributeView.class);
Assert.assertNotNull(attributeView);
permissions = attributeView.readAttributes().permissions();
}
@Override
public void close() throws IOException {
attributeView.setPermissions(permissions);
}
public void setPermissions(Set<PosixFilePermission> newPermissions) throws IOException {
attributeView.setPermissions(newPermissions);
}
public Set<PosixFilePermission> getCopyPermissions() {
return new HashSet<>(permissions);
}
}