2012-01-22 23:34:34 +02:00
/ *
2014-01-06 22:48:02 +01:00
* Licensed to Elasticsearch under one or more contributor
* license agreements . See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership . Elasticsearch licenses this file to you under
* the Apache License , Version 2 . 0 ( the " License " ) ; you may
* not use this file except in compliance with the License .
* You may obtain a copy of the License at
2012-01-22 23:34:34 +02:00
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing ,
* software distributed under the License is distributed on an
* " AS IS " BASIS , WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND , either express or implied . See the License for the
* specific language governing permissions and limitations
* under the License .
* /
2014-12-15 13:47:34 +01:00
package org.elasticsearch.gateway ;
2012-01-22 23:34:34 +02:00
2012-06-29 01:01:26 +02:00
import com.google.common.collect.Lists ;
2012-01-25 11:58:29 +02:00
import com.google.common.collect.Maps ;
2014-11-06 15:12:16 +01:00
import org.apache.lucene.util.IOUtils ;
2014-11-22 21:39:14 +01:00
import org.elasticsearch.ElasticsearchException ;
2014-01-06 21:58:46 +01:00
import org.elasticsearch.ElasticsearchIllegalArgumentException ;
Switch to murmurhash3 to route documents to shards.
We currently use the djb2 hash function in order to compute the shard a
document should go to. Unfortunately this hash function is not very
sophisticated and you can sometimes hit adversarial cases, such as numeric ids
on 33 shards.
Murmur3 generates hashes with a better distribution, which should avoid the
adversarial cases.
Here are some examples of how 100000 incremental ids are distributed to shards
using either djb2 or murmur3.
5 shards:
Murmur3: [19933, 19964, 19940, 20030, 20133]
DJB: [20000, 20000, 20000, 20000, 20000]
3 shards:
Murmur3: [33185, 33347, 33468]
DJB: [30100, 30000, 39900]
33 shards:
Murmur3: [2999, 3096, 2930, 2986, 3070, 3093, 3023, 3052, 3112, 2940, 3036, 2985, 3031, 3048, 3127, 2961, 2901, 3105, 3041, 3130, 3013, 3035, 3031, 3019, 3008, 3022, 3111, 3086, 3016, 2996, 3075, 2945, 2977]
DJB: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 900, 900, 900, 900, 1000, 1000, 10000, 10000, 10000, 10000, 9100, 9100, 9100, 9100, 9000, 9000, 0, 0, 0, 0, 0, 0]
Even if djb2 looks ideal in some cases (5 shards), the fact that the
distribution of its hashes has some patterns can raise issues with some shard
counts (eg. 3, or even worse 33).
Some tests have been modified because they relied on implementation details of
the routing hash function.
Close #7954
2014-10-02 00:34:05 +02:00
import org.elasticsearch.ElasticsearchIllegalStateException ;
2012-01-24 13:28:15 +02:00
import org.elasticsearch.Version ;
2012-01-22 23:34:34 +02:00
import org.elasticsearch.cluster.ClusterChangedEvent ;
2015-02-05 20:40:35 +01:00
import org.elasticsearch.cluster.ClusterState ;
2012-01-22 23:34:34 +02:00
import org.elasticsearch.cluster.ClusterStateListener ;
import org.elasticsearch.cluster.metadata.IndexMetaData ;
import org.elasticsearch.cluster.metadata.MetaData ;
2012-02-01 00:29:49 +02:00
import org.elasticsearch.cluster.node.DiscoveryNode ;
2014-12-29 16:26:35 +01:00
import org.elasticsearch.cluster.routing.DjbHashFunction ;
2015-02-18 15:34:06 -07:00
import org.elasticsearch.cluster.routing.HashFunction ;
2014-12-29 16:26:35 +01:00
import org.elasticsearch.cluster.routing.SimpleHashFunction ;
2012-01-22 23:34:34 +02:00
import org.elasticsearch.common.Nullable ;
import org.elasticsearch.common.component.AbstractComponent ;
import org.elasticsearch.common.inject.Inject ;
2012-01-24 13:28:15 +02:00
import org.elasticsearch.common.settings.ImmutableSettings ;
2012-01-22 23:34:34 +02:00
import org.elasticsearch.common.settings.Settings ;
import org.elasticsearch.common.unit.TimeValue ;
2012-06-28 13:32:44 +02:00
import org.elasticsearch.common.util.concurrent.ConcurrentCollections ;
2014-11-16 19:57:03 +01:00
import org.elasticsearch.common.util.concurrent.FutureUtils ;
2014-12-18 16:34:05 +01:00
import org.elasticsearch.common.xcontent.ToXContent ;
import org.elasticsearch.common.xcontent.XContentBuilder ;
import org.elasticsearch.common.xcontent.XContentParser ;
import org.elasticsearch.common.xcontent.XContentType ;
2012-01-22 23:34:34 +02:00
import org.elasticsearch.env.NodeEnvironment ;
2014-11-20 16:29:43 +01:00
import org.elasticsearch.env.ShardLock ;
2012-01-22 23:34:34 +02:00
import org.elasticsearch.index.Index ;
2015-02-05 20:40:35 +01:00
import org.elasticsearch.indices.IndicesService ;
2012-06-28 13:32:44 +02:00
import org.elasticsearch.threadpool.ThreadPool ;
2012-01-22 23:34:34 +02:00
Switch to murmurhash3 to route documents to shards.
We currently use the djb2 hash function in order to compute the shard a
document should go to. Unfortunately this hash function is not very
sophisticated and you can sometimes hit adversarial cases, such as numeric ids
on 33 shards.
Murmur3 generates hashes with a better distribution, which should avoid the
adversarial cases.
Here are some examples of how 100000 incremental ids are distributed to shards
using either djb2 or murmur3.
5 shards:
Murmur3: [19933, 19964, 19940, 20030, 20133]
DJB: [20000, 20000, 20000, 20000, 20000]
3 shards:
Murmur3: [33185, 33347, 33468]
DJB: [30100, 30000, 39900]
33 shards:
Murmur3: [2999, 3096, 2930, 2986, 3070, 3093, 3023, 3052, 3112, 2940, 3036, 2985, 3031, 3048, 3127, 2961, 2901, 3105, 3041, 3130, 3013, 3035, 3031, 3019, 3008, 3022, 3111, 3086, 3016, 2996, 3075, 2945, 2977]
DJB: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 900, 900, 900, 900, 1000, 1000, 10000, 10000, 10000, 10000, 9100, 9100, 9100, 9100, 9000, 9000, 0, 0, 0, 0, 0, 0]
Even if djb2 looks ideal in some cases (5 shards), the fact that the
distribution of its hashes has some patterns can raise issues with some shard
counts (eg. 3, or even worse 33).
Some tests have been modified because they relied on implementation details of
the routing hash function.
Close #7954
2014-10-02 00:34:05 +02:00
import java.io.IOException ;
2015-02-18 15:34:06 -07:00
import java.nio.file.DirectoryStream ;
import java.nio.file.Files ;
import java.nio.file.Path ;
2012-06-29 01:01:26 +02:00
import java.util.List ;
2012-01-25 11:58:29 +02:00
import java.util.Map ;
2012-01-22 23:34:34 +02:00
import java.util.Set ;
2012-06-28 13:32:44 +02:00
import java.util.concurrent.ScheduledFuture ;
2014-10-15 11:28:18 +02:00
import java.util.regex.Pattern ;
2012-01-22 23:34:34 +02:00
/ * *
*
* /
2014-12-15 13:47:34 +01:00
public class GatewayMetaState extends AbstractComponent implements ClusterStateListener {
2012-01-22 23:34:34 +02:00
2014-10-15 11:28:18 +02:00
static final String GLOBAL_STATE_FILE_PREFIX = " global- " ;
private static final String INDEX_STATE_FILE_PREFIX = " state- " ;
static final Pattern GLOBAL_STATE_FILE_PATTERN = Pattern . compile ( GLOBAL_STATE_FILE_PREFIX + " ( \\ d+)( " + MetaDataStateFormat . STATE_FILE_EXTENSION + " )? " ) ;
static final Pattern INDEX_STATE_FILE_PATTERN = Pattern . compile ( INDEX_STATE_FILE_PREFIX + " ( \\ d+)( " + MetaDataStateFormat . STATE_FILE_EXTENSION + " )? " ) ;
private static final String GLOBAL_STATE_LOG_TYPE = " [_global] " ;
Switch to murmurhash3 to route documents to shards.
We currently use the djb2 hash function in order to compute the shard a
document should go to. Unfortunately this hash function is not very
sophisticated and you can sometimes hit adversarial cases, such as numeric ids
on 33 shards.
Murmur3 generates hashes with a better distribution, which should avoid the
adversarial cases.
Here are some examples of how 100000 incremental ids are distributed to shards
using either djb2 or murmur3.
5 shards:
Murmur3: [19933, 19964, 19940, 20030, 20133]
DJB: [20000, 20000, 20000, 20000, 20000]
3 shards:
Murmur3: [33185, 33347, 33468]
DJB: [30100, 30000, 39900]
33 shards:
Murmur3: [2999, 3096, 2930, 2986, 3070, 3093, 3023, 3052, 3112, 2940, 3036, 2985, 3031, 3048, 3127, 2961, 2901, 3105, 3041, 3130, 3013, 3035, 3031, 3019, 3008, 3022, 3111, 3086, 3016, 2996, 3075, 2945, 2977]
DJB: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 900, 900, 900, 900, 1000, 1000, 10000, 10000, 10000, 10000, 9100, 9100, 9100, 9100, 9000, 9000, 0, 0, 0, 0, 0, 0]
Even if djb2 looks ideal in some cases (5 shards), the fact that the
distribution of its hashes has some patterns can raise issues with some shard
counts (eg. 3, or even worse 33).
Some tests have been modified because they relied on implementation details of
the routing hash function.
Close #7954
2014-10-02 00:34:05 +02:00
private static final String DEPRECATED_SETTING_ROUTING_HASH_FUNCTION = " cluster.routing.operation.hash.type " ;
private static final String DEPRECATED_SETTING_ROUTING_USE_TYPE = " cluster.routing.operation.use_type " ;
2014-12-15 13:47:34 +01:00
public static final String GATEWAY_DANGLING_TIMEOUT = " gateway.dangling_timeout " ;
2014-12-18 16:34:05 +01:00
public static final String GATEWAY_DELETE_TIMEOUT = " gateway.delete_timeout " ;
2014-12-15 13:47:34 +01:00
public static final String GATEWAY_AUTO_IMPORT_DANGLED = " gateway.auto_import_dangled " ;
// legacy - this used to be in a different package
private static final String GATEWAY_LOCAL_DANGLING_TIMEOUT = " gateway.local.dangling_timeout " ;
private static final String GATEWAY_LOCAL_AUTO_IMPORT_DANGLED = " gateway.local.auto_import_dangled " ;
2014-12-29 16:26:35 +01:00
2012-06-29 01:01:26 +02:00
static enum AutoImportDangledState {
NO ( ) {
@Override
public boolean shouldImport ( ) {
return false ;
}
} ,
YES ( ) {
@Override
public boolean shouldImport ( ) {
return true ;
}
} ,
CLOSED ( ) {
@Override
public boolean shouldImport ( ) {
return true ;
}
} ;
public abstract boolean shouldImport ( ) ;
public static AutoImportDangledState fromString ( String value ) {
if ( " no " . equalsIgnoreCase ( value ) ) {
return NO ;
} else if ( " yes " . equalsIgnoreCase ( value ) ) {
return YES ;
} else if ( " closed " . equalsIgnoreCase ( value ) ) {
return CLOSED ;
} else {
2014-01-06 21:58:46 +01:00
throw new ElasticsearchIllegalArgumentException ( " failed to parse [ " + value + " ], not a valid auto dangling import type " ) ;
2012-06-29 01:01:26 +02:00
}
}
}
2012-01-22 23:34:34 +02:00
private final NodeEnvironment nodeEnv ;
2012-06-28 13:32:44 +02:00
private final ThreadPool threadPool ;
2012-01-22 23:34:34 +02:00
2012-06-29 01:01:26 +02:00
private final LocalAllocateDangledIndices allocateDangledIndices ;
2012-07-02 17:17:16 +02:00
@Nullable
2012-01-22 23:34:34 +02:00
private volatile MetaData currentMetaData ;
2012-01-25 11:58:29 +02:00
private final XContentType format ;
private final ToXContent . Params formatParams ;
2014-09-26 23:03:04 +04:00
private final ToXContent . Params gatewayModeFormatParams ;
2012-01-25 11:58:29 +02:00
2012-06-28 13:32:44 +02:00
2012-06-29 01:01:26 +02:00
private final AutoImportDangledState autoImportDangled ;
2012-06-28 13:32:44 +02:00
private final TimeValue danglingTimeout ;
2014-12-18 16:34:05 +01:00
private final TimeValue deleteTimeout ;
2012-06-28 13:32:44 +02:00
private final Map < String , DanglingIndex > danglingIndices = ConcurrentCollections . newConcurrentMap ( ) ;
private final Object danglingMutex = new Object ( ) ;
2015-02-05 20:40:35 +01:00
private final IndicesService indicesService ;
2012-06-28 13:32:44 +02:00
2012-01-22 23:34:34 +02:00
@Inject
2014-12-15 13:47:34 +01:00
public GatewayMetaState ( Settings settings , ThreadPool threadPool , NodeEnvironment nodeEnv ,
TransportNodesListGatewayMetaState nodesListGatewayMetaState , LocalAllocateDangledIndices allocateDangledIndices ,
2015-02-05 20:40:35 +01:00
IndicesService indicesService ) throws Exception {
2012-01-22 23:34:34 +02:00
super ( settings ) ;
this . nodeEnv = nodeEnv ;
2012-06-28 13:32:44 +02:00
this . threadPool = threadPool ;
2012-01-25 11:58:29 +02:00
this . format = XContentType . fromRestContentType ( settings . get ( " format " , " smile " ) ) ;
2012-06-29 01:01:26 +02:00
this . allocateDangledIndices = allocateDangledIndices ;
2012-01-22 23:34:34 +02:00
nodesListGatewayMetaState . init ( this ) ;
2012-01-25 11:58:29 +02:00
if ( this . format = = XContentType . SMILE ) {
Map < String , String > params = Maps . newHashMap ( ) ;
params . put ( " binary " , " true " ) ;
formatParams = new ToXContent . MapParams ( params ) ;
2014-09-26 23:03:04 +04:00
Map < String , String > gatewayModeParams = Maps . newHashMap ( ) ;
gatewayModeParams . put ( " binary " , " true " ) ;
gatewayModeParams . put ( MetaData . CONTEXT_MODE_PARAM , MetaData . CONTEXT_MODE_GATEWAY ) ;
gatewayModeFormatParams = new ToXContent . MapParams ( gatewayModeParams ) ;
2012-01-25 11:58:29 +02:00
} else {
formatParams = ToXContent . EMPTY_PARAMS ;
2014-09-26 23:03:04 +04:00
Map < String , String > gatewayModeParams = Maps . newHashMap ( ) ;
gatewayModeParams . put ( MetaData . CONTEXT_MODE_PARAM , MetaData . CONTEXT_MODE_GATEWAY ) ;
gatewayModeFormatParams = new ToXContent . MapParams ( gatewayModeParams ) ;
2012-01-25 11:58:29 +02:00
}
2014-12-15 13:47:34 +01:00
this . autoImportDangled = AutoImportDangledState . fromString ( settings . get ( GATEWAY_AUTO_IMPORT_DANGLED , settings . get ( GATEWAY_LOCAL_AUTO_IMPORT_DANGLED , AutoImportDangledState . YES . toString ( ) ) ) ) ;
this . danglingTimeout = settings . getAsTime ( GATEWAY_DANGLING_TIMEOUT , settings . getAsTime ( GATEWAY_LOCAL_DANGLING_TIMEOUT , TimeValue . timeValueHours ( 2 ) ) ) ;
2014-12-18 16:34:05 +01:00
this . deleteTimeout = settings . getAsTime ( GATEWAY_DELETE_TIMEOUT , TimeValue . timeValueSeconds ( 30 ) ) ;
2012-06-28 13:32:44 +02:00
2014-12-18 16:34:05 +01:00
logger . debug ( " using {} [{}], {} [{}], with {} [{}] " ,
GATEWAY_AUTO_IMPORT_DANGLED , this . autoImportDangled ,
GATEWAY_DELETE_TIMEOUT , this . deleteTimeout ,
GATEWAY_DANGLING_TIMEOUT , this . danglingTimeout ) ;
2014-10-15 11:28:18 +02:00
if ( DiscoveryNode . masterNode ( settings ) | | DiscoveryNode . dataNode ( settings ) ) {
nodeEnv . ensureAtomicMoveSupported ( ) ;
}
2012-02-01 00:29:49 +02:00
if ( DiscoveryNode . masterNode ( settings ) ) {
try {
2014-12-09 15:59:30 +01:00
ensureNoPre019State ( ) ;
Switch to murmurhash3 to route documents to shards.
We currently use the djb2 hash function in order to compute the shard a
document should go to. Unfortunately this hash function is not very
sophisticated and you can sometimes hit adversarial cases, such as numeric ids
on 33 shards.
Murmur3 generates hashes with a better distribution, which should avoid the
adversarial cases.
Here are some examples of how 100000 incremental ids are distributed to shards
using either djb2 or murmur3.
5 shards:
Murmur3: [19933, 19964, 19940, 20030, 20133]
DJB: [20000, 20000, 20000, 20000, 20000]
3 shards:
Murmur3: [33185, 33347, 33468]
DJB: [30100, 30000, 39900]
33 shards:
Murmur3: [2999, 3096, 2930, 2986, 3070, 3093, 3023, 3052, 3112, 2940, 3036, 2985, 3031, 3048, 3127, 2961, 2901, 3105, 3041, 3130, 3013, 3035, 3031, 3019, 3008, 3022, 3111, 3086, 3016, 2996, 3075, 2945, 2977]
DJB: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 900, 900, 900, 900, 1000, 1000, 10000, 10000, 10000, 10000, 9100, 9100, 9100, 9100, 9000, 9000, 0, 0, 0, 0, 0, 0]
Even if djb2 looks ideal in some cases (5 shards), the fact that the
distribution of its hashes has some patterns can raise issues with some shard
counts (eg. 3, or even worse 33).
Some tests have been modified because they relied on implementation details of
the routing hash function.
Close #7954
2014-10-02 00:34:05 +02:00
pre20Upgrade ( ) ;
2012-02-01 00:29:49 +02:00
long start = System . currentTimeMillis ( ) ;
loadState ( ) ;
logger . debug ( " took {} to load state " , TimeValue . timeValueMillis ( System . currentTimeMillis ( ) - start ) ) ;
} catch ( Exception e ) {
logger . error ( " failed to read local state, exiting... " , e ) ;
throw e ;
}
2012-01-22 23:34:34 +02:00
}
2015-02-05 20:40:35 +01:00
this . indicesService = indicesService ;
2012-01-22 23:34:34 +02:00
}
2012-07-02 17:17:16 +02:00
public MetaData loadMetaState ( ) throws Exception {
return loadState ( ) ;
2012-01-22 23:34:34 +02:00
}
@Override
public void clusterChanged ( ClusterChangedEvent event ) {
2015-02-05 20:40:35 +01:00
final ClusterState state = event . state ( ) ;
if ( state . blocks ( ) . disableStatePersistence ( ) ) {
2012-07-02 17:17:16 +02:00
// reset the current metadata, we need to start fresh...
this . currentMetaData = null ;
2012-01-22 23:34:34 +02:00
return ;
}
2015-02-05 20:40:35 +01:00
MetaData newMetaData = state . metaData ( ) ;
2012-06-29 01:01:26 +02:00
// we don't check if metaData changed, since we might be called several times and we need to check dangling...
2012-01-22 23:34:34 +02:00
boolean success = true ;
2012-06-29 01:01:26 +02:00
// only applied to master node, writing the global and index level states
2015-02-05 20:40:35 +01:00
if ( state . nodes ( ) . localNode ( ) . masterNode ( ) ) {
2012-06-29 01:01:26 +02:00
// check if the global state changed?
2012-07-02 17:17:16 +02:00
if ( currentMetaData = = null | | ! MetaData . isGlobalStateEquals ( currentMetaData , newMetaData ) ) {
2012-06-29 01:01:26 +02:00
try {
2014-10-15 11:28:18 +02:00
writeGlobalState ( " changed " , newMetaData ) ;
2013-08-13 16:50:16 +02:00
} catch ( Throwable e ) {
2012-06-29 01:01:26 +02:00
success = false ;
}
2012-01-22 23:34:34 +02:00
}
2012-06-29 01:01:26 +02:00
// check and write changes in indices
2012-07-02 17:17:16 +02:00
for ( IndexMetaData indexMetaData : newMetaData ) {
2012-06-29 01:01:26 +02:00
String writeReason = null ;
2012-07-02 17:17:16 +02:00
IndexMetaData currentIndexMetaData ;
if ( currentMetaData = = null ) {
// a new event..., check from the state stored
2014-11-22 21:39:14 +01:00
try {
2014-12-17 09:39:19 +01:00
currentIndexMetaData = loadIndexState ( indexMetaData . index ( ) ) ;
2014-11-22 21:39:14 +01:00
} catch ( IOException ex ) {
throw new ElasticsearchException ( " failed to load index state " , ex ) ;
}
2012-07-02 17:17:16 +02:00
} else {
currentIndexMetaData = currentMetaData . index ( indexMetaData . index ( ) ) ;
}
2012-06-29 01:01:26 +02:00
if ( currentIndexMetaData = = null ) {
writeReason = " freshly created " ;
} else if ( currentIndexMetaData . version ( ) ! = indexMetaData . version ( ) ) {
writeReason = " version changed from [ " + currentIndexMetaData . version ( ) + " ] to [ " + indexMetaData . version ( ) + " ] " ;
}
2012-01-22 23:34:34 +02:00
2012-06-29 01:01:26 +02:00
// we update the writeReason only if we really need to write it
if ( writeReason = = null ) {
continue ;
}
2012-01-22 23:34:34 +02:00
2012-06-29 01:01:26 +02:00
try {
writeIndex ( writeReason , indexMetaData , currentIndexMetaData ) ;
2013-08-13 16:50:16 +02:00
} catch ( Throwable e ) {
2012-06-29 01:01:26 +02:00
success = false ;
}
2012-01-22 23:34:34 +02:00
}
}
2012-06-29 01:01:26 +02:00
// handle dangling indices, we handle those for all nodes that have a node file (data or master)
2012-06-28 13:32:44 +02:00
if ( nodeEnv . hasNodeFile ( ) ) {
if ( danglingTimeout . millis ( ) > = 0 ) {
synchronized ( danglingMutex ) {
for ( String danglingIndex : danglingIndices . keySet ( ) ) {
2012-07-02 17:17:16 +02:00
if ( newMetaData . hasIndex ( danglingIndex ) ) {
2012-06-28 13:32:44 +02:00
logger . debug ( " [{}] no longer dangling (created), removing " , danglingIndex ) ;
DanglingIndex removed = danglingIndices . remove ( danglingIndex ) ;
2014-11-16 19:57:03 +01:00
FutureUtils . cancel ( removed . future ) ;
2012-06-28 13:32:44 +02:00
}
}
// delete indices that are no longer part of the metadata
try {
for ( String indexName : nodeEnv . findAllIndices ( ) ) {
// if we have the index on the metadata, don't delete it
2012-07-02 17:17:16 +02:00
if ( newMetaData . hasIndex ( indexName ) ) {
2012-06-28 13:32:44 +02:00
continue ;
}
if ( danglingIndices . containsKey ( indexName ) ) {
// already dangling, continue
continue ;
}
2014-12-17 09:39:19 +01:00
final IndexMetaData indexMetaData = loadIndexState ( indexName ) ;
2012-07-02 17:17:16 +02:00
if ( indexMetaData ! = null ) {
2014-10-28 15:36:53 +01:00
if ( autoImportDangled . shouldImport ( ) ) {
logger . info ( " [{}] dangling index, exists on local file system, but not in cluster metadata, auto import to cluster state [{}] " , indexName , autoImportDangled ) ;
danglingIndices . put ( indexName , new DanglingIndex ( indexName , null ) ) ;
} else if ( danglingTimeout . millis ( ) = = 0 ) {
2012-07-02 17:17:16 +02:00
logger . info ( " [{}] dangling index, exists on local file system, but not in cluster metadata, timeout set to 0, deleting now " , indexName ) ;
2015-02-05 20:40:35 +01:00
indicesService . deleteIndexStore ( " dangling index with timeout set to 0 " , indexMetaData ) ;
2012-07-02 17:17:16 +02:00
} else {
logger . info ( " [{}] dangling index, exists on local file system, but not in cluster metadata, scheduling to delete in [{}], auto import to cluster state [{}] " , indexName , danglingTimeout , autoImportDangled ) ;
2014-11-25 13:58:31 +01:00
danglingIndices . put ( indexName ,
new DanglingIndex ( indexName ,
threadPool . schedule ( danglingTimeout ,
ThreadPool . Names . SAME ,
2015-02-05 20:40:35 +01:00
new RemoveDanglingIndex ( indexMetaData ) ) ) ) ;
2012-07-02 17:17:16 +02:00
}
2012-06-28 13:32:44 +02:00
}
}
2013-08-13 16:50:16 +02:00
} catch ( Throwable e ) {
2012-06-28 13:32:44 +02:00
logger . warn ( " failed to find dangling indices " , e ) ;
}
}
}
2012-06-29 01:01:26 +02:00
if ( autoImportDangled . shouldImport ( ) & & ! danglingIndices . isEmpty ( ) ) {
final List < IndexMetaData > dangled = Lists . newArrayList ( ) ;
for ( String indexName : danglingIndices . keySet ( ) ) {
2014-11-22 21:39:14 +01:00
IndexMetaData indexMetaData ;
try {
2014-12-17 09:39:19 +01:00
indexMetaData = loadIndexState ( indexName ) ;
2014-11-22 21:39:14 +01:00
} catch ( IOException ex ) {
throw new ElasticsearchException ( " failed to load index state " , ex ) ;
}
2012-07-02 17:17:16 +02:00
if ( indexMetaData = = null ) {
logger . debug ( " failed to find state for dangling index [{}] " , indexName ) ;
continue ;
}
2012-06-29 01:01:26 +02:00
// we might have someone copying over an index, renaming the directory, handle that
if ( ! indexMetaData . index ( ) . equals ( indexName ) ) {
logger . info ( " dangled index directory name is [{}], state name is [{}], renaming to directory name " , indexName , indexMetaData . index ( ) ) ;
2013-11-02 02:13:25 +01:00
indexMetaData = IndexMetaData . builder ( indexMetaData ) . index ( indexName ) . build ( ) ;
2012-06-29 01:01:26 +02:00
}
if ( autoImportDangled = = AutoImportDangledState . CLOSED ) {
2013-11-02 02:13:25 +01:00
indexMetaData = IndexMetaData . builder ( indexMetaData ) . state ( IndexMetaData . State . CLOSE ) . build ( ) ;
2012-06-29 01:01:26 +02:00
}
if ( indexMetaData ! = null ) {
dangled . add ( indexMetaData ) ;
}
}
IndexMetaData [ ] dangledIndices = dangled . toArray ( new IndexMetaData [ dangled . size ( ) ] ) ;
try {
allocateDangledIndices . allocateDangled ( dangledIndices , new LocalAllocateDangledIndices . Listener ( ) {
@Override
public void onResponse ( LocalAllocateDangledIndices . AllocateDangledResponse response ) {
logger . trace ( " allocated dangled " ) ;
}
@Override
public void onFailure ( Throwable e ) {
logger . info ( " failed to send allocated dangled " , e ) ;
}
} ) ;
2013-08-13 16:50:16 +02:00
} catch ( Throwable e ) {
2012-06-29 01:01:26 +02:00
logger . warn ( " failed to send allocate dangled " , e ) ;
}
}
2012-06-28 13:32:44 +02:00
}
2012-01-22 23:34:34 +02:00
if ( success ) {
2012-07-02 17:17:16 +02:00
currentMetaData = newMetaData ;
2012-01-22 23:34:34 +02:00
}
}
2014-10-15 11:28:18 +02:00
/ * *
* Returns a StateFormat that can read and write { @link MetaData }
* /
static MetaDataStateFormat < MetaData > globalStateFormat ( XContentType format , final ToXContent . Params formatParams , final boolean deleteOldFiles ) {
return new MetaDataStateFormat < MetaData > ( format , deleteOldFiles ) {
2012-01-22 23:34:34 +02:00
2014-10-15 11:28:18 +02:00
@Override
public void toXContent ( XContentBuilder builder , MetaData state ) throws IOException {
MetaData . Builder . toXContent ( state , builder , formatParams ) ;
2012-01-22 23:34:34 +02:00
}
2014-10-15 11:28:18 +02:00
@Override
public MetaData fromXContent ( XContentParser parser ) throws IOException {
return MetaData . Builder . fromXContent ( parser ) ;
2012-01-22 23:34:34 +02:00
}
2014-10-15 11:28:18 +02:00
} ;
2012-01-22 23:34:34 +02:00
}
2014-10-15 11:28:18 +02:00
/ * *
* Returns a StateFormat that can read and write { @link IndexMetaData }
* /
static MetaDataStateFormat < IndexMetaData > indexStateFormat ( XContentType format , final ToXContent . Params formatParams , boolean deleteOldFiles ) {
return new MetaDataStateFormat < IndexMetaData > ( format , deleteOldFiles ) {
2013-07-23 15:55:41 +02:00
2014-10-15 11:28:18 +02:00
@Override
public void toXContent ( XContentBuilder builder , IndexMetaData state ) throws IOException {
IndexMetaData . Builder . toXContent ( state , builder , formatParams ) ; }
@Override
public IndexMetaData fromXContent ( XContentParser parser ) throws IOException {
return IndexMetaData . Builder . fromXContent ( parser ) ;
2012-01-22 23:34:34 +02:00
}
2014-10-15 11:28:18 +02:00
} ;
}
2012-01-22 23:34:34 +02:00
2014-10-15 11:28:18 +02:00
private void writeIndex ( String reason , IndexMetaData indexMetaData , @Nullable IndexMetaData previousIndexMetaData ) throws Exception {
logger . trace ( " [{}] writing state, reason [{}] " , indexMetaData . index ( ) , reason ) ;
final boolean deleteOldFiles = previousIndexMetaData ! = null & & previousIndexMetaData . version ( ) ! = indexMetaData . version ( ) ;
final MetaDataStateFormat < IndexMetaData > writer = indexStateFormat ( format , formatParams , deleteOldFiles ) ;
try {
2014-11-25 13:58:31 +01:00
writer . write ( indexMetaData , INDEX_STATE_FILE_PREFIX , indexMetaData . version ( ) ,
nodeEnv . indexPaths ( new Index ( indexMetaData . index ( ) ) ) ) ;
2014-10-15 11:28:18 +02:00
} catch ( Throwable ex ) {
logger . warn ( " [{}]: failed to write index state " , ex , indexMetaData . index ( ) ) ;
throw new IOException ( " failed to write state for [ " + indexMetaData . index ( ) + " ] " , ex ) ;
2013-07-23 15:55:41 +02:00
}
2014-10-15 11:28:18 +02:00
}
2012-01-22 23:34:34 +02:00
2014-10-15 11:28:18 +02:00
private void writeGlobalState ( String reason , MetaData metaData ) throws Exception {
logger . trace ( " {} writing state, reason [{}] " , GLOBAL_STATE_LOG_TYPE , reason ) ;
final MetaDataStateFormat < MetaData > writer = globalStateFormat ( format , gatewayModeFormatParams , true ) ;
try {
2014-11-22 21:39:14 +01:00
writer . write ( metaData , GLOBAL_STATE_FILE_PREFIX , metaData . version ( ) , nodeEnv . nodeDataPaths ( ) ) ;
2014-10-15 11:28:18 +02:00
} catch ( Throwable ex ) {
logger . warn ( " {}: failed to write global state " , ex , GLOBAL_STATE_LOG_TYPE ) ;
throw new IOException ( " failed to write global state " , ex ) ;
2012-01-22 23:34:34 +02:00
}
}
2012-07-02 17:17:16 +02:00
private MetaData loadState ( ) throws Exception {
2012-01-22 23:34:34 +02:00
MetaData globalMetaData = loadGlobalState ( ) ;
2013-11-02 01:04:33 +01:00
MetaData . Builder metaDataBuilder ;
2012-01-22 23:34:34 +02:00
if ( globalMetaData ! = null ) {
2013-11-02 01:04:33 +01:00
metaDataBuilder = MetaData . builder ( globalMetaData ) ;
} else {
metaDataBuilder = MetaData . builder ( ) ;
2012-01-22 23:34:34 +02:00
}
2014-10-15 11:28:18 +02:00
final Set < String > indices = nodeEnv . findAllIndices ( ) ;
2012-01-22 23:34:34 +02:00
for ( String index : indices ) {
2014-12-17 09:39:19 +01:00
IndexMetaData indexMetaData = loadIndexState ( index ) ;
2012-01-22 23:34:34 +02:00
if ( indexMetaData = = null ) {
logger . debug ( " [{}] failed to find metadata for existing index location " , index ) ;
} else {
metaDataBuilder . put ( indexMetaData , false ) ;
}
}
2012-07-02 17:17:16 +02:00
return metaDataBuilder . build ( ) ;
2012-01-22 23:34:34 +02:00
}
2012-07-02 17:17:16 +02:00
@Nullable
2014-12-17 09:39:19 +01:00
private IndexMetaData loadIndexState ( String index ) throws IOException {
2014-11-25 13:58:31 +01:00
return MetaDataStateFormat . loadLatestState ( logger , indexStateFormat ( format , formatParams , true ) ,
INDEX_STATE_FILE_PATTERN , " [ " + index + " ] " , nodeEnv . indexPaths ( new Index ( index ) ) ) ;
2012-01-22 23:34:34 +02:00
}
2014-11-22 21:39:14 +01:00
private MetaData loadGlobalState ( ) throws IOException {
return MetaDataStateFormat . loadLatestState ( logger , globalStateFormat ( format , gatewayModeFormatParams , true ) , GLOBAL_STATE_FILE_PATTERN , GLOBAL_STATE_LOG_TYPE , nodeEnv . nodeDataPaths ( ) ) ;
2012-01-22 23:34:34 +02:00
}
2014-10-15 11:28:18 +02:00
2014-12-09 15:59:30 +01:00
/ * *
* Throws an IAE if a pre 0 . 19 state is detected
* /
private void ensureNoPre019State ( ) throws Exception {
2014-12-02 21:28:51 +01:00
for ( Path dataLocation : nodeEnv . nodeDataPaths ( ) ) {
final Path stateLocation = dataLocation . resolve ( MetaDataStateFormat . STATE_DIR_NAME ) ;
if ( ! Files . exists ( stateLocation ) ) {
2012-01-22 23:34:34 +02:00
continue ;
}
2014-12-02 21:28:51 +01:00
try ( DirectoryStream < Path > stream = Files . newDirectoryStream ( stateLocation ) ) {
for ( Path stateFile : stream ) {
if ( logger . isTraceEnabled ( ) ) {
logger . trace ( " [upgrade]: processing [ " + stateFile . getFileName ( ) + " ] " ) ;
}
2014-12-09 15:59:30 +01:00
final String name = stateFile . getFileName ( ) . toString ( ) ;
if ( name . startsWith ( " metadata- " ) ) {
throw new ElasticsearchIllegalStateException ( " Detected pre 0.19 metadata file please upgrade to a version before "
+ Version . CURRENT . minimumCompatibilityVersion ( )
+ " first to upgrade state structures - metadata found: [ " + stateFile . getParent ( ) . toAbsolutePath ( ) ) ;
2012-01-22 23:34:34 +02:00
}
}
}
}
}
2012-06-28 13:32:44 +02:00
Switch to murmurhash3 to route documents to shards.
We currently use the djb2 hash function in order to compute the shard a
document should go to. Unfortunately this hash function is not very
sophisticated and you can sometimes hit adversarial cases, such as numeric ids
on 33 shards.
Murmur3 generates hashes with a better distribution, which should avoid the
adversarial cases.
Here are some examples of how 100000 incremental ids are distributed to shards
using either djb2 or murmur3.
5 shards:
Murmur3: [19933, 19964, 19940, 20030, 20133]
DJB: [20000, 20000, 20000, 20000, 20000]
3 shards:
Murmur3: [33185, 33347, 33468]
DJB: [30100, 30000, 39900]
33 shards:
Murmur3: [2999, 3096, 2930, 2986, 3070, 3093, 3023, 3052, 3112, 2940, 3036, 2985, 3031, 3048, 3127, 2961, 2901, 3105, 3041, 3130, 3013, 3035, 3031, 3019, 3008, 3022, 3111, 3086, 3016, 2996, 3075, 2945, 2977]
DJB: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 900, 900, 900, 900, 1000, 1000, 10000, 10000, 10000, 10000, 9100, 9100, 9100, 9100, 9000, 9000, 0, 0, 0, 0, 0, 0]
Even if djb2 looks ideal in some cases (5 shards), the fact that the
distribution of its hashes has some patterns can raise issues with some shard
counts (eg. 3, or even worse 33).
Some tests have been modified because they relied on implementation details of
the routing hash function.
Close #7954
2014-10-02 00:34:05 +02:00
/ * *
* Elasticsearch 2 . 0 deprecated custom routing hash functions . So what we do here is that for old indices , we
* move this old & deprecated node setting to an index setting so that we can keep things backward compatible .
* /
private void pre20Upgrade ( ) throws Exception {
2014-12-29 16:26:35 +01:00
final Class < ? extends HashFunction > pre20HashFunction ;
final String pre20HashFunctionName = settings . get ( DEPRECATED_SETTING_ROUTING_HASH_FUNCTION , null ) ;
final boolean hasCustomPre20HashFunction = pre20HashFunctionName ! = null ;
// the hash function package has changed we replace the two hash functions if their fully qualified name is used.
if ( hasCustomPre20HashFunction ) {
switch ( pre20HashFunctionName ) {
case " org.elasticsearch.cluster.routing.operation.hash.simple.SimpleHashFunction " :
pre20HashFunction = SimpleHashFunction . class ;
break ;
case " org.elasticsearch.cluster.routing.operation.hash.djb.DjbHashFunction " :
pre20HashFunction = DjbHashFunction . class ;
break ;
default :
pre20HashFunction = settings . getAsClass ( DEPRECATED_SETTING_ROUTING_HASH_FUNCTION , DjbHashFunction . class , " org.elasticsearch.cluster.routing. " , " HashFunction " ) ;
}
} else {
pre20HashFunction = DjbHashFunction . class ;
}
Switch to murmurhash3 to route documents to shards.
We currently use the djb2 hash function in order to compute the shard a
document should go to. Unfortunately this hash function is not very
sophisticated and you can sometimes hit adversarial cases, such as numeric ids
on 33 shards.
Murmur3 generates hashes with a better distribution, which should avoid the
adversarial cases.
Here are some examples of how 100000 incremental ids are distributed to shards
using either djb2 or murmur3.
5 shards:
Murmur3: [19933, 19964, 19940, 20030, 20133]
DJB: [20000, 20000, 20000, 20000, 20000]
3 shards:
Murmur3: [33185, 33347, 33468]
DJB: [30100, 30000, 39900]
33 shards:
Murmur3: [2999, 3096, 2930, 2986, 3070, 3093, 3023, 3052, 3112, 2940, 3036, 2985, 3031, 3048, 3127, 2961, 2901, 3105, 3041, 3130, 3013, 3035, 3031, 3019, 3008, 3022, 3111, 3086, 3016, 2996, 3075, 2945, 2977]
DJB: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 900, 900, 900, 900, 1000, 1000, 10000, 10000, 10000, 10000, 9100, 9100, 9100, 9100, 9000, 9000, 0, 0, 0, 0, 0, 0]
Even if djb2 looks ideal in some cases (5 shards), the fact that the
distribution of its hashes has some patterns can raise issues with some shard
counts (eg. 3, or even worse 33).
Some tests have been modified because they relied on implementation details of
the routing hash function.
Close #7954
2014-10-02 00:34:05 +02:00
final Boolean pre20UseType = settings . getAsBoolean ( DEPRECATED_SETTING_ROUTING_USE_TYPE , null ) ;
MetaData metaData = loadMetaState ( ) ;
for ( IndexMetaData indexMetaData : metaData ) {
if ( indexMetaData . settings ( ) . get ( IndexMetaData . SETTING_LEGACY_ROUTING_HASH_FUNCTION ) = = null
& & indexMetaData . getCreationVersion ( ) . before ( Version . V_2_0_0 ) ) {
// these settings need an upgrade
Settings indexSettings = ImmutableSettings . builder ( ) . put ( indexMetaData . settings ( ) )
2014-12-29 16:26:35 +01:00
. put ( IndexMetaData . SETTING_LEGACY_ROUTING_HASH_FUNCTION , pre20HashFunction )
Switch to murmurhash3 to route documents to shards.
We currently use the djb2 hash function in order to compute the shard a
document should go to. Unfortunately this hash function is not very
sophisticated and you can sometimes hit adversarial cases, such as numeric ids
on 33 shards.
Murmur3 generates hashes with a better distribution, which should avoid the
adversarial cases.
Here are some examples of how 100000 incremental ids are distributed to shards
using either djb2 or murmur3.
5 shards:
Murmur3: [19933, 19964, 19940, 20030, 20133]
DJB: [20000, 20000, 20000, 20000, 20000]
3 shards:
Murmur3: [33185, 33347, 33468]
DJB: [30100, 30000, 39900]
33 shards:
Murmur3: [2999, 3096, 2930, 2986, 3070, 3093, 3023, 3052, 3112, 2940, 3036, 2985, 3031, 3048, 3127, 2961, 2901, 3105, 3041, 3130, 3013, 3035, 3031, 3019, 3008, 3022, 3111, 3086, 3016, 2996, 3075, 2945, 2977]
DJB: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 900, 900, 900, 900, 1000, 1000, 10000, 10000, 10000, 10000, 9100, 9100, 9100, 9100, 9000, 9000, 0, 0, 0, 0, 0, 0]
Even if djb2 looks ideal in some cases (5 shards), the fact that the
distribution of its hashes has some patterns can raise issues with some shard
counts (eg. 3, or even worse 33).
Some tests have been modified because they relied on implementation details of
the routing hash function.
Close #7954
2014-10-02 00:34:05 +02:00
. put ( IndexMetaData . SETTING_LEGACY_ROUTING_USE_TYPE , pre20UseType = = null ? false : pre20UseType )
. build ( ) ;
IndexMetaData newMetaData = IndexMetaData . builder ( indexMetaData )
. version ( indexMetaData . version ( ) )
. settings ( indexSettings )
. build ( ) ;
writeIndex ( " upgrade " , newMetaData , null ) ;
} else if ( indexMetaData . getCreationVersion ( ) . onOrAfter ( Version . V_2_0_0 ) ) {
if ( indexMetaData . getSettings ( ) . get ( IndexMetaData . SETTING_LEGACY_ROUTING_HASH_FUNCTION ) ! = null
| | indexMetaData . getSettings ( ) . get ( IndexMetaData . SETTING_LEGACY_ROUTING_USE_TYPE ) ! = null ) {
throw new ElasticsearchIllegalStateException ( " Indices created on or after 2.0 should NOT contain [ " + IndexMetaData . SETTING_LEGACY_ROUTING_HASH_FUNCTION
+ " ] + or [ " + IndexMetaData . SETTING_LEGACY_ROUTING_USE_TYPE + " ] in their index settings " ) ;
}
}
}
2014-12-29 16:26:35 +01:00
if ( hasCustomPre20HashFunction | | pre20UseType ! = null ) {
Switch to murmurhash3 to route documents to shards.
We currently use the djb2 hash function in order to compute the shard a
document should go to. Unfortunately this hash function is not very
sophisticated and you can sometimes hit adversarial cases, such as numeric ids
on 33 shards.
Murmur3 generates hashes with a better distribution, which should avoid the
adversarial cases.
Here are some examples of how 100000 incremental ids are distributed to shards
using either djb2 or murmur3.
5 shards:
Murmur3: [19933, 19964, 19940, 20030, 20133]
DJB: [20000, 20000, 20000, 20000, 20000]
3 shards:
Murmur3: [33185, 33347, 33468]
DJB: [30100, 30000, 39900]
33 shards:
Murmur3: [2999, 3096, 2930, 2986, 3070, 3093, 3023, 3052, 3112, 2940, 3036, 2985, 3031, 3048, 3127, 2961, 2901, 3105, 3041, 3130, 3013, 3035, 3031, 3019, 3008, 3022, 3111, 3086, 3016, 2996, 3075, 2945, 2977]
DJB: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 900, 900, 900, 900, 1000, 1000, 10000, 10000, 10000, 10000, 9100, 9100, 9100, 9100, 9000, 9000, 0, 0, 0, 0, 0, 0]
Even if djb2 looks ideal in some cases (5 shards), the fact that the
distribution of its hashes has some patterns can raise issues with some shard
counts (eg. 3, or even worse 33).
Some tests have been modified because they relied on implementation details of
the routing hash function.
Close #7954
2014-10-02 00:34:05 +02:00
logger . warn ( " Settings [{}] and [{}] are deprecated. Index settings from your old indices have been updated to record the fact that they "
+ " used some custom routing logic, you can now remove these settings from your `elasticsearch.yml` file " , DEPRECATED_SETTING_ROUTING_HASH_FUNCTION , DEPRECATED_SETTING_ROUTING_USE_TYPE ) ;
}
}
2012-06-28 13:32:44 +02:00
class RemoveDanglingIndex implements Runnable {
2015-02-05 20:40:35 +01:00
private final IndexMetaData metaData ;
2012-06-28 13:32:44 +02:00
2015-02-05 20:40:35 +01:00
RemoveDanglingIndex ( IndexMetaData metaData ) {
this . metaData = metaData ;
2012-06-28 13:32:44 +02:00
}
@Override
public void run ( ) {
synchronized ( danglingMutex ) {
2015-02-05 20:40:35 +01:00
DanglingIndex remove = danglingIndices . remove ( metaData . index ( ) ) ;
2012-06-28 13:32:44 +02:00
// no longer there...
if ( remove = = null ) {
return ;
}
2015-02-05 20:40:35 +01:00
logger . warn ( " [{}] deleting dangling index " , metaData . index ( ) ) ;
2014-11-06 15:12:16 +01:00
try {
2015-02-05 20:40:35 +01:00
indicesService . deleteIndexStore ( " deleting dangling index " , metaData ) ;
2014-11-06 15:12:16 +01:00
} catch ( Exception ex ) {
logger . debug ( " failed to delete dangling index " , ex ) ;
}
2012-06-28 13:32:44 +02:00
}
}
}
static class DanglingIndex {
public final String index ;
public final ScheduledFuture future ;
DanglingIndex ( String index , ScheduledFuture future ) {
this . index = index ;
this . future = future ;
}
}
2014-11-22 21:39:14 +01:00
2012-01-22 23:34:34 +02:00
}