auto_expand_replicas: [0-auto] can cause data loss when nodes are removed, closes #934.
This is caused because of a race condition between when to handle the removed node and move a replica to a primary mode, and when to remove the replica because of the 0-auto setting.
This commit is contained in:
parent
c9aca9c6de
commit
518488b0b2
|
@ -16,6 +16,16 @@
|
|||
<option name="JD_P_AT_EMPTY_LINES" value="false" />
|
||||
<option name="METHOD_ANNOTATION_WRAP" value="0" />
|
||||
<option name="FIELD_ANNOTATION_WRAP" value="0" />
|
||||
<ADDITIONAL_INDENT_OPTIONS fileType="css">
|
||||
<option name="INDENT_SIZE" value="4" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="8" />
|
||||
<option name="TAB_SIZE" value="4" />
|
||||
<option name="USE_TAB_CHARACTER" value="false" />
|
||||
<option name="SMART_TABS" value="false" />
|
||||
<option name="LABEL_INDENT_SIZE" value="0" />
|
||||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
<option name="USE_RELATIVE_INDENTS" value="false" />
|
||||
</ADDITIONAL_INDENT_OPTIONS>
|
||||
<ADDITIONAL_INDENT_OPTIONS fileType="groovy">
|
||||
<option name="INDENT_SIZE" value="4" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="8" />
|
||||
|
@ -36,6 +46,16 @@
|
|||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
<option name="USE_RELATIVE_INDENTS" value="false" />
|
||||
</ADDITIONAL_INDENT_OPTIONS>
|
||||
<ADDITIONAL_INDENT_OPTIONS fileType="haml">
|
||||
<option name="INDENT_SIZE" value="2" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="8" />
|
||||
<option name="TAB_SIZE" value="4" />
|
||||
<option name="USE_TAB_CHARACTER" value="false" />
|
||||
<option name="SMART_TABS" value="false" />
|
||||
<option name="LABEL_INDENT_SIZE" value="0" />
|
||||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
<option name="USE_RELATIVE_INDENTS" value="false" />
|
||||
</ADDITIONAL_INDENT_OPTIONS>
|
||||
<ADDITIONAL_INDENT_OPTIONS fileType="java">
|
||||
<option name="INDENT_SIZE" value="4" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="8" />
|
||||
|
@ -126,6 +146,11 @@
|
|||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
<option name="USE_RELATIVE_INDENTS" value="false" />
|
||||
</ADDITIONAL_INDENT_OPTIONS>
|
||||
<codeStyleSettings language="JavaScript">
|
||||
<option name="METHOD_ANNOTATION_WRAP" value="0" />
|
||||
<option name="FIELD_ANNOTATION_WRAP" value="0" />
|
||||
<option name="PARENT_SETTINGS_INSTALLED" value="true" />
|
||||
</codeStyleSettings>
|
||||
</value>
|
||||
</option>
|
||||
<option name="USE_PER_PROJECT_SETTINGS" value="true" />
|
||||
|
|
|
@ -54,6 +54,11 @@ public interface ClusterService extends LifecycleComponent<ClusterService> {
|
|||
*/
|
||||
OperationRouting operationRouting();
|
||||
|
||||
/**
|
||||
* Adds a priority listener for updated cluster states.
|
||||
*/
|
||||
void addPriority(ClusterStateListener listener);
|
||||
|
||||
/**
|
||||
* Adds a listener for updated cluster states.
|
||||
*/
|
||||
|
|
|
@ -39,6 +39,8 @@ import static org.elasticsearch.common.unit.TimeValue.*;
|
|||
*/
|
||||
public class RoutingService extends AbstractLifecycleComponent<RoutingService> implements ClusterStateListener {
|
||||
|
||||
private static final String CLUSTER_UPDATE_TASK_SOURCE = "routing-table-updater";
|
||||
|
||||
private final ThreadPool threadPool;
|
||||
|
||||
private final ClusterService clusterService;
|
||||
|
@ -60,7 +62,7 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
|
|||
}
|
||||
|
||||
@Override protected void doStart() throws ElasticSearchException {
|
||||
clusterService.add(this);
|
||||
clusterService.addPriority(this);
|
||||
}
|
||||
|
||||
@Override protected void doStop() throws ElasticSearchException {
|
||||
|
@ -75,7 +77,7 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
|
|||
}
|
||||
|
||||
@Override public void clusterChanged(ClusterChangedEvent event) {
|
||||
if (event.source().equals(RoutingTableUpdater.CLUSTER_UPDATE_TASK_SOURCE)) {
|
||||
if (event.source().equals(CLUSTER_UPDATE_TASK_SOURCE)) {
|
||||
// that's us, ignore this event
|
||||
return;
|
||||
}
|
||||
|
@ -93,7 +95,7 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
|
|||
// also, if the routing table changed, it means that we have new indices, or shard have started
|
||||
// or failed, we want to apply this as fast as possible
|
||||
routingTableDirty = true;
|
||||
threadPool.cached().execute(new RoutingTableUpdater());
|
||||
reroute();
|
||||
} else {
|
||||
if (event.nodesAdded()) {
|
||||
routingTableDirty = true;
|
||||
|
@ -107,32 +109,34 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
|
|||
}
|
||||
}
|
||||
|
||||
private void reroute() {
|
||||
try {
|
||||
if (!routingTableDirty) {
|
||||
return;
|
||||
}
|
||||
if (lifecycle.stopped()) {
|
||||
return;
|
||||
}
|
||||
clusterService.submitStateUpdateTask(CLUSTER_UPDATE_TASK_SOURCE, new ClusterStateUpdateTask() {
|
||||
@Override public ClusterState execute(ClusterState currentState) {
|
||||
RoutingAllocation.Result routingResult = shardsAllocation.reroute(currentState);
|
||||
if (!routingResult.changed()) {
|
||||
// no state changed
|
||||
return currentState;
|
||||
}
|
||||
return newClusterStateBuilder().state(currentState).routingResult(routingResult).build();
|
||||
}
|
||||
});
|
||||
routingTableDirty = false;
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to reroute routing table", e);
|
||||
}
|
||||
}
|
||||
|
||||
private class RoutingTableUpdater implements Runnable {
|
||||
|
||||
private static final String CLUSTER_UPDATE_TASK_SOURCE = "routing-table-updater";
|
||||
|
||||
@Override public void run() {
|
||||
try {
|
||||
if (!routingTableDirty) {
|
||||
return;
|
||||
}
|
||||
if (lifecycle.stopped()) {
|
||||
return;
|
||||
}
|
||||
clusterService.submitStateUpdateTask(CLUSTER_UPDATE_TASK_SOURCE, new ClusterStateUpdateTask() {
|
||||
@Override public ClusterState execute(ClusterState currentState) {
|
||||
RoutingAllocation.Result routingResult = shardsAllocation.reroute(currentState);
|
||||
if (!routingResult.changed()) {
|
||||
// no state changed
|
||||
return currentState;
|
||||
}
|
||||
return newClusterStateBuilder().state(currentState).routingResult(routingResult).build();
|
||||
}
|
||||
});
|
||||
routingTableDirty = false;
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to reroute routing table", e);
|
||||
}
|
||||
reroute();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -65,6 +65,7 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
|
|||
|
||||
private volatile ExecutorService updateTasksExecutor;
|
||||
|
||||
private final List<ClusterStateListener> priorityClusterStateListeners = new CopyOnWriteArrayList<ClusterStateListener>();
|
||||
private final List<ClusterStateListener> clusterStateListeners = new CopyOnWriteArrayList<ClusterStateListener>();
|
||||
|
||||
private final Queue<NotifyTimeout> onGoingTimeouts = new LinkedTransferQueue<NotifyTimeout>();
|
||||
|
@ -127,13 +128,17 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
|
|||
return this.clusterState;
|
||||
}
|
||||
|
||||
public void addPriority(ClusterStateListener listener) {
|
||||
priorityClusterStateListeners.add(listener);
|
||||
}
|
||||
|
||||
public void add(ClusterStateListener listener) {
|
||||
clusterStateListeners.add(listener);
|
||||
}
|
||||
|
||||
public void remove(ClusterStateListener listener) {
|
||||
clusterStateListeners.remove(listener);
|
||||
for (Iterator<NotifyTimeout> it = onGoingTimeouts.iterator(); it.hasNext();) {
|
||||
for (Iterator<NotifyTimeout> it = onGoingTimeouts.iterator(); it.hasNext(); ) {
|
||||
NotifyTimeout timeout = it.next();
|
||||
if (timeout.listener.equals(listener)) {
|
||||
timeout.cancel();
|
||||
|
@ -226,6 +231,9 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
|
|||
}
|
||||
}
|
||||
|
||||
for (ClusterStateListener listener : priorityClusterStateListeners) {
|
||||
listener.clusterChanged(clusterChangedEvent);
|
||||
}
|
||||
for (ClusterStateListener listener : clusterStateListeners) {
|
||||
listener.clusterChanged(clusterChangedEvent);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue