mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 10:25:15 +00:00
fsync translog when closing and not deleting, allocate failed shard to another started shard
This commit is contained in:
parent
d9979f8dfe
commit
908fba44e7
@ -29,8 +29,6 @@ import org.elasticsearch.common.settings.Settings;
|
|||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import static org.elasticsearch.common.settings.ImmutableSettings.Builder.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author kimchy (shay.banon)
|
* @author kimchy (shay.banon)
|
||||||
*/
|
*/
|
||||||
@ -40,11 +38,7 @@ public class NodeEnvironment extends AbstractComponent {
|
|||||||
|
|
||||||
private final Lock lock;
|
private final Lock lock;
|
||||||
|
|
||||||
public NodeEnvironment(File nodeFile) {
|
private final int localNodeId;
|
||||||
super(EMPTY_SETTINGS);
|
|
||||||
this.nodeFile = nodeFile;
|
|
||||||
this.lock = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Inject public NodeEnvironment(Settings settings, Environment environment) throws IOException {
|
@Inject public NodeEnvironment(Settings settings, Environment environment) throws IOException {
|
||||||
super(settings);
|
super(settings);
|
||||||
@ -53,11 +47,13 @@ public class NodeEnvironment extends AbstractComponent {
|
|||||||
!settings.getAsBoolean("node.master", true)) {
|
!settings.getAsBoolean("node.master", true)) {
|
||||||
nodeFile = null;
|
nodeFile = null;
|
||||||
lock = null;
|
lock = null;
|
||||||
|
localNodeId = -1;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Lock lock = null;
|
Lock lock = null;
|
||||||
File dir = null;
|
File dir = null;
|
||||||
|
int localNodeId = -1;
|
||||||
for (int i = 0; i < 100; i++) {
|
for (int i = 0; i < 100; i++) {
|
||||||
dir = new File(new File(environment.workWithClusterFile(), "nodes"), Integer.toString(i));
|
dir = new File(new File(environment.workWithClusterFile(), "nodes"), Integer.toString(i));
|
||||||
if (!dir.exists()) {
|
if (!dir.exists()) {
|
||||||
@ -69,6 +65,7 @@ public class NodeEnvironment extends AbstractComponent {
|
|||||||
boolean obtained = tmpLock.obtain();
|
boolean obtained = tmpLock.obtain();
|
||||||
if (obtained) {
|
if (obtained) {
|
||||||
lock = tmpLock;
|
lock = tmpLock;
|
||||||
|
localNodeId = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
@ -78,13 +75,18 @@ public class NodeEnvironment extends AbstractComponent {
|
|||||||
if (lock == null) {
|
if (lock == null) {
|
||||||
throw new IOException("Failed to obtain node lock");
|
throw new IOException("Failed to obtain node lock");
|
||||||
}
|
}
|
||||||
|
this.localNodeId = localNodeId;
|
||||||
this.lock = lock;
|
this.lock = lock;
|
||||||
this.nodeFile = dir;
|
this.nodeFile = dir;
|
||||||
if (logger.isDebugEnabled()) {
|
if (logger.isDebugEnabled()) {
|
||||||
logger.debug("using node location [{}]", dir);
|
logger.debug("using node location [{}], local_node_id [{}]", dir, localNodeId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int localNodeId() {
|
||||||
|
return this.localNodeId;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean hasNodeFile() {
|
public boolean hasNodeFile() {
|
||||||
return nodeFile != null && lock != null;
|
return nodeFile != null && lock != null;
|
||||||
}
|
}
|
||||||
|
@ -36,6 +36,8 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.elasticsearch.cluster.routing.ShardRoutingState.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author kimchy (shay.banon)
|
* @author kimchy (shay.banon)
|
||||||
*/
|
*/
|
||||||
@ -49,7 +51,60 @@ public class LocalGatewayNodeAllocation extends NodeAllocation {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override public void applyFailedShards(NodeAllocations nodeAllocations, RoutingNodes routingNodes, DiscoveryNodes nodes, List<? extends ShardRouting> failedShards) {
|
@Override public void applyFailedShards(NodeAllocations nodeAllocations, RoutingNodes routingNodes, DiscoveryNodes nodes, List<? extends ShardRouting> failedShards) {
|
||||||
// TODO when a shard failed and we in the initial allocation, find an existing one
|
for (ShardRouting failedShard : failedShards) {
|
||||||
|
IndexRoutingTable indexRoutingTable = routingNodes.routingTable().index(failedShard.index());
|
||||||
|
if (!routingNodes.blocks().hasIndexBlock(indexRoutingTable.index(), LocalGateway.INDEX_NOT_RECOVERED_BLOCK)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we are still in the initial allocation, find another node with existing shards
|
||||||
|
// all primary are unassigned for the index, see if we can allocate it on existing nodes, if not, don't assign
|
||||||
|
Set<String> nodesIds = Sets.newHashSet();
|
||||||
|
nodesIds.addAll(nodes.dataNodes().keySet());
|
||||||
|
nodesIds.addAll(nodes.masterNodes().keySet());
|
||||||
|
TransportNodesListGatewayState.NodesLocalGatewayState nodesState = listGatewayState.list(nodesIds, null).actionGet();
|
||||||
|
|
||||||
|
// make a list of ShardId to Node, each one from the latest version
|
||||||
|
Tuple<DiscoveryNode, Long> t = null;
|
||||||
|
for (TransportNodesListGatewayState.NodeLocalGatewayState nodeState : nodesState) {
|
||||||
|
// we don't want to reallocate to the node we failed on
|
||||||
|
if (nodeState.node().id().equals(failedShard.currentNodeId())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// go and find
|
||||||
|
for (Map.Entry<ShardId, Long> entry : nodeState.state().shards().entrySet()) {
|
||||||
|
if (entry.getKey().equals(failedShard.shardId())) {
|
||||||
|
if (t == null || entry.getValue() > t.v2().longValue()) {
|
||||||
|
t = new Tuple<DiscoveryNode, Long>(nodeState.node(), entry.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (t != null) {
|
||||||
|
// we found a node to allocate to, do it
|
||||||
|
RoutingNode currentRoutingNode = routingNodes.nodesToShards().get(failedShard.currentNodeId());
|
||||||
|
if (currentRoutingNode == null) {
|
||||||
|
// already failed (might be called several times for the same shard)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// find the shard and cancel relocation
|
||||||
|
Iterator<MutableShardRouting> shards = currentRoutingNode.iterator();
|
||||||
|
while (shards.hasNext()) {
|
||||||
|
MutableShardRouting shard = shards.next();
|
||||||
|
if (shard.shardId().equals(failedShard.shardId())) {
|
||||||
|
shard.deassignNode();
|
||||||
|
shards.remove();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RoutingNode targetNode = routingNodes.nodesToShards().get(t.v1().id());
|
||||||
|
targetNode.add(new MutableShardRouting(failedShard.index(), failedShard.id(),
|
||||||
|
targetNode.nodeId(), failedShard.relocatingNodeId(),
|
||||||
|
failedShard.primary(), INITIALIZING));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public boolean allocateUnassigned(NodeAllocations nodeAllocations, RoutingNodes routingNodes, DiscoveryNodes nodes) {
|
@Override public boolean allocateUnassigned(NodeAllocations nodeAllocations, RoutingNodes routingNodes, DiscoveryNodes nodes) {
|
||||||
|
@ -19,6 +19,8 @@
|
|||||||
|
|
||||||
package org.elasticsearch.index.translog.fs;
|
package org.elasticsearch.index.translog.fs;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.io.FileSystemUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -60,6 +62,8 @@ public class RafReference {
|
|||||||
raf.close();
|
raf.close();
|
||||||
if (delete) {
|
if (delete) {
|
||||||
file.delete();
|
file.delete();
|
||||||
|
} else {
|
||||||
|
FileSystemUtils.syncFile(file);
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// ignore
|
// ignore
|
||||||
|
Loading…
x
Reference in New Issue
Block a user