Merge pull request #16745 from s1monw/probe_write_access

Assert that we can write in all data-path on startup

Today we might start a node and some of the paths might not have the
required permissions. This commit goes through all data directories as
well as index, shard and state directories and ensures we have write access.
To make this work across all OS etc. we are trying to write a real file
and remove it again in each of those directories
This commit is contained in:
Simon Willnauer 2016-02-22 10:38:49 -08:00
commit 35ae8263af
4 changed files with 252 additions and 66 deletions

View File

@ -40,9 +40,11 @@ import org.elasticsearch.common.settings.Setting.Scope;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.gateway.MetaDataStateFormat;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.shard.ShardPath;
import org.elasticsearch.index.store.FsDirectoryService;
import org.elasticsearch.monitor.fs.FsInfo;
import org.elasticsearch.monitor.fs.FsProbe;
@ -76,7 +78,7 @@ import static java.util.Collections.unmodifiableSet;
/**
* A component that holds all data paths for a single node.
*/
public class NodeEnvironment extends AbstractComponent implements Closeable {
public final class NodeEnvironment extends AbstractComponent implements Closeable {
public static class NodePath {
/* ${data.paths}/nodes/{node.id} */
public final Path path;
@ -167,63 +169,71 @@ public class NodeEnvironment extends AbstractComponent implements Closeable {
localNodeId = -1;
return;
}
final NodePath[] nodePaths = new NodePath[environment.dataWithClusterFiles().length];
final Lock[] locks = new Lock[nodePaths.length];
sharedDataPath = environment.sharedDataFile();
boolean success = false;
int localNodeId = -1;
IOException lastException = null;
int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings);
for (int possibleLockId = 0; possibleLockId < maxLocalStorageNodes; possibleLockId++) {
for (int dirIndex = 0; dirIndex < environment.dataWithClusterFiles().length; dirIndex++) {
Path dir = environment.dataWithClusterFiles()[dirIndex].resolve(NODES_FOLDER).resolve(Integer.toString(possibleLockId));
Files.createDirectories(dir);
try {
sharedDataPath = environment.sharedDataFile();
int localNodeId = -1;
IOException lastException = null;
int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings);
for (int possibleLockId = 0; possibleLockId < maxLocalStorageNodes; possibleLockId++) {
for (int dirIndex = 0; dirIndex < environment.dataWithClusterFiles().length; dirIndex++) {
Path dir = environment.dataWithClusterFiles()[dirIndex].resolve(NODES_FOLDER).resolve(Integer.toString(possibleLockId));
Files.createDirectories(dir);
try (Directory luceneDir = FSDirectory.open(dir, NativeFSLockFactory.INSTANCE)) {
logger.trace("obtaining node lock on {} ...", dir.toAbsolutePath());
try {
locks[dirIndex] = luceneDir.obtainLock(NODE_LOCK_FILENAME);
nodePaths[dirIndex] = new NodePath(dir, environment);
localNodeId = possibleLockId;
} catch (LockObtainFailedException ex) {
logger.trace("failed to obtain node lock on {}", dir.toAbsolutePath());
try (Directory luceneDir = FSDirectory.open(dir, NativeFSLockFactory.INSTANCE)) {
logger.trace("obtaining node lock on {} ...", dir.toAbsolutePath());
try {
locks[dirIndex] = luceneDir.obtainLock(NODE_LOCK_FILENAME);
nodePaths[dirIndex] = new NodePath(dir, environment);
localNodeId = possibleLockId;
} catch (LockObtainFailedException ex) {
logger.trace("failed to obtain node lock on {}", dir.toAbsolutePath());
// release all the ones that were obtained up until now
releaseAndNullLocks(locks);
break;
}
} catch (IOException e) {
logger.trace("failed to obtain node lock on {}", e, dir.toAbsolutePath());
lastException = new IOException("failed to obtain lock on " + dir.toAbsolutePath(), e);
// release all the ones that were obtained up until now
releaseAndNullLocks(locks);
break;
}
} catch (IOException e) {
logger.trace("failed to obtain node lock on {}", e, dir.toAbsolutePath());
lastException = new IOException("failed to obtain lock on " + dir.toAbsolutePath(), e);
// release all the ones that were obtained up until now
releaseAndNullLocks(locks);
}
if (locks[0] != null) {
// we found a lock, break
break;
}
}
if (locks[0] != null) {
// we found a lock, break
break;
if (locks[0] == null) {
throw new IllegalStateException("Failed to obtain node lock, is the following location writable?: "
+ Arrays.toString(environment.dataWithClusterFiles()), lastException);
}
this.localNodeId = localNodeId;
this.locks = locks;
this.nodePaths = nodePaths;
if (logger.isDebugEnabled()) {
logger.debug("using node location [{}], local_node_id [{}]", nodePaths, localNodeId);
}
maybeLogPathDetails();
maybeLogHeapDetails();
applySegmentInfosTrace(settings);
assertCanWrite();
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(locks);
}
}
if (locks[0] == null) {
throw new IllegalStateException("Failed to obtain node lock, is the following location writable?: "
+ Arrays.toString(environment.dataWithClusterFiles()), lastException);
}
this.localNodeId = localNodeId;
this.locks = locks;
this.nodePaths = nodePaths;
if (logger.isDebugEnabled()) {
logger.debug("using node location [{}], local_node_id [{}]", nodePaths, localNodeId);
}
maybeLogPathDetails();
maybeLogHeapDetails();
applySegmentInfosTrace(settings);
}
private static void releaseAndNullLocks(Lock[] locks) {
@ -793,7 +803,7 @@ public class NodeEnvironment extends AbstractComponent implements Closeable {
}
@Override
public void close() {
public final void close() {
if (closed.compareAndSet(false, true) && locks != null) {
for (Lock lock : locks) {
try {
@ -909,4 +919,45 @@ public class NodeEnvironment extends AbstractComponent implements Closeable {
return shardPath.getParent().getParent().getParent();
}
/**
* This is a best effort to ensure that we actually have write permissions to write in all our data directories.
* This prevents disasters if nodes are started under the wrong username etc.
*/
private void assertCanWrite() throws IOException {
for (Path path : nodeDataPaths()) { // check node-paths are writable
tryWriteTempFile(path);
}
for (String index : this.findAllIndices()) {
for (Path path : this.indexPaths(index)) { // check index paths are writable
Path statePath = path.resolve(MetaDataStateFormat.STATE_DIR_NAME);
tryWriteTempFile(statePath);
tryWriteTempFile(path);
}
for (ShardId shardID : this.findAllShardIds(new Index(index, IndexMetaData.INDEX_UUID_NA_VALUE))) {
Path[] paths = this.availableShardPaths(shardID);
for (Path path : paths) { // check shard paths are writable
Path indexDir = path.resolve(ShardPath.INDEX_FOLDER_NAME);
Path statePath = path.resolve(MetaDataStateFormat.STATE_DIR_NAME);
Path translogDir = path.resolve(ShardPath.TRANSLOG_FOLDER_NAME);
tryWriteTempFile(indexDir);
tryWriteTempFile(translogDir);
tryWriteTempFile(statePath);
tryWriteTempFile(path);
}
}
}
}
private static void tryWriteTempFile(Path path) throws IOException {
if (Files.exists(path)) {
Path resolve = path.resolve(".es_temp_file");
try {
Files.createFile(resolve);
Files.deleteIfExists(resolve);
} catch (IOException ex) {
throw new IOException("failed to write in data directory [" + path + "] write permission is required", ex);
}
}
}
}

View File

@ -0,0 +1,101 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.env;
import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.PosixPermissionsResetter;
import org.junit.BeforeClass;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.PosixFileAttributeView;
import java.nio.file.attribute.PosixFilePermission;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
public class NodeEnvironmentEvilTests extends ESTestCase {
private static boolean isPosix;
@BeforeClass
public static void checkPosix() throws IOException {
isPosix = Files.getFileAttributeView(createTempFile(), PosixFileAttributeView.class) != null;
}
public void testMissingWritePermission() throws IOException {
assumeTrue("posix filesystem", isPosix);
final String[] tempPaths = tmpPaths();
Path path = PathUtils.get(randomFrom(tempPaths));
try (PosixPermissionsResetter attr = new PosixPermissionsResetter(path)) {
attr.setPermissions(new HashSet<>(Arrays.asList(PosixFilePermission.OTHERS_READ, PosixFilePermission.GROUP_READ, PosixFilePermission.OWNER_READ)));
Settings build = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
.putArray(Environment.PATH_DATA_SETTING.getKey(), tempPaths).build();
IOException ioException = expectThrows(IOException.class, () -> {
new NodeEnvironment(build, new Environment(build));
});
assertTrue(ioException.getMessage(), ioException.getMessage().startsWith(path.toString()));
}
}
public void testMissingWritePermissionOnIndex() throws IOException {
assumeTrue("posix filesystem", isPosix);
final String[] tempPaths = tmpPaths();
Path path = PathUtils.get(randomFrom(tempPaths));
Path fooIndex = path.resolve("elasticsearch").resolve("nodes").resolve("0").resolve(NodeEnvironment.INDICES_FOLDER).resolve("foo");
Files.createDirectories(fooIndex);
try (PosixPermissionsResetter attr = new PosixPermissionsResetter(fooIndex)) {
attr.setPermissions(new HashSet<>(Arrays.asList(PosixFilePermission.OTHERS_READ, PosixFilePermission.GROUP_READ, PosixFilePermission.OWNER_READ)));
Settings build = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
.putArray(Environment.PATH_DATA_SETTING.getKey(), tempPaths).build();
IOException ioException = expectThrows(IOException.class, () -> {
new NodeEnvironment(build, new Environment(build));
});
assertTrue(ioException.getMessage(), ioException.getMessage().startsWith("failed to write in data directory"));
}
}
public void testMissingWritePermissionOnShard() throws IOException {
assumeTrue("posix filesystem", isPosix);
final String[] tempPaths = tmpPaths();
Path path = PathUtils.get(randomFrom(tempPaths));
Path fooIndex = path.resolve("elasticsearch").resolve("nodes").resolve("0").resolve(NodeEnvironment.INDICES_FOLDER).resolve("foo");
Path fooShard = fooIndex.resolve("0");
Path fooShardIndex = fooShard.resolve("index");
Path fooShardTranslog = fooShard.resolve("translog");
Path fooShardState = fooShard.resolve("_state");
Path pick = randomFrom(fooShard, fooShardIndex, fooShardTranslog, fooShardState);
Files.createDirectories(pick);
try (PosixPermissionsResetter attr = new PosixPermissionsResetter(pick)) {
attr.setPermissions(new HashSet<>(Arrays.asList(PosixFilePermission.OTHERS_READ, PosixFilePermission.GROUP_READ, PosixFilePermission.OWNER_READ)));
Settings build = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
.putArray(Environment.PATH_DATA_SETTING.getKey(), tempPaths).build();
IOException ioException = expectThrows(IOException.class, () -> {
new NodeEnvironment(build, new Environment(build));
});
assertTrue(ioException.getMessage(), ioException.getMessage().startsWith("failed to write in data directory"));
}
}
}

View File

@ -51,6 +51,7 @@ import org.elasticsearch.common.cli.UserError;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.PosixPermissionsResetter;
import org.junit.BeforeClass;
@LuceneTestCase.SuppressFileSystems("*")
@ -63,24 +64,6 @@ public class InstallPluginCommandTests extends ESTestCase {
isPosix = Files.getFileAttributeView(createTempFile(), PosixFileAttributeView.class) != null;
}
/** Stores the posix attributes for a path and resets them on close. */
static class PosixPermissionsResetter implements AutoCloseable {
private final PosixFileAttributeView attributeView;
final Set<PosixFilePermission> permissions;
public PosixPermissionsResetter(Path path) throws IOException {
attributeView = Files.getFileAttributeView(path, PosixFileAttributeView.class);
assertNotNull(attributeView);
permissions = attributeView.readAttributes().permissions();
}
@Override
public void close() throws IOException {
attributeView.setPermissions(permissions);
}
public void setPermissions(Set<PosixFilePermission> newPermissions) throws IOException {
attributeView.setPermissions(newPermissions);
}
}
/** Creates a test environment with bin, config and plugins directories. */
static Environment createEnv() throws IOException {
Path home = createTempDir();
@ -103,7 +86,7 @@ public class InstallPluginCommandTests extends ESTestCase {
}
}
}
static String writeZip(Path structure, String prefix) throws IOException {
Path zip = createTempDir().resolve(structure.getFileName() + ".zip");
try (ZipOutputStream stream = new ZipOutputStream(Files.newOutputStream(zip))) {
@ -382,7 +365,7 @@ public class InstallPluginCommandTests extends ESTestCase {
Files.createFile(binDir.resolve("somescript"));
String pluginZip = createPlugin("fake", pluginDir);
try (PosixPermissionsResetter binAttrs = new PosixPermissionsResetter(env.binFile())) {
Set<PosixFilePermission> perms = new HashSet<>(binAttrs.permissions);
Set<PosixFilePermission> perms = binAttrs.getCopyPermissions();
// make sure at least one execute perm is missing, so we know we forced it during installation
perms.remove(PosixFilePermission.GROUP_EXECUTE);
binAttrs.setPermissions(perms);

View File

@ -0,0 +1,51 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test;
import org.junit.Assert;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.PosixFileAttributeView;
import java.nio.file.attribute.PosixFilePermission;
import java.util.HashSet;
import java.util.Set;
/** Stores the posix attributes for a path and resets them on close. */
public class PosixPermissionsResetter implements AutoCloseable {
private final PosixFileAttributeView attributeView;
private final Set<PosixFilePermission> permissions;
public PosixPermissionsResetter(Path path) throws IOException {
attributeView = Files.getFileAttributeView(path, PosixFileAttributeView.class);
Assert.assertNotNull(attributeView);
permissions = attributeView.readAttributes().permissions();
}
@Override
public void close() throws IOException {
attributeView.setPermissions(permissions);
}
public void setPermissions(Set<PosixFilePermission> newPermissions) throws IOException {
attributeView.setPermissions(newPermissions);
}
public Set<PosixFilePermission> getCopyPermissions() {
return new HashSet<>(permissions);
}
}