diff --git a/buildSrc/src/main/resources/checkstyle_suppressions.xml b/buildSrc/src/main/resources/checkstyle_suppressions.xml index 07dd29a33ad..13b983c8a5e 100644 --- a/buildSrc/src/main/resources/checkstyle_suppressions.xml +++ b/buildSrc/src/main/resources/checkstyle_suppressions.xml @@ -441,7 +441,6 @@ - @@ -1117,7 +1116,6 @@ - @@ -1138,7 +1136,6 @@ - diff --git a/core/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java b/core/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java index 2a8984e59d4..2cb4fb6450a 100644 --- a/core/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java +++ b/core/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java @@ -135,6 +135,8 @@ final class Bootstrap { JNANatives.trySetMaxNumberOfThreads(); + JNANatives.trySetMaxSizeVirtualMemory(); + // init lucene random seed. it will use /dev/urandom where available: StringHelper.randomId(); } diff --git a/core/src/main/java/org/elasticsearch/bootstrap/BootstrapCheck.java b/core/src/main/java/org/elasticsearch/bootstrap/BootstrapCheck.java index 433dd4498a4..0a31da34c60 100644 --- a/core/src/main/java/org/elasticsearch/bootstrap/BootstrapCheck.java +++ b/core/src/main/java/org/elasticsearch/bootstrap/BootstrapCheck.java @@ -123,6 +123,7 @@ final class BootstrapCheck { if (Constants.LINUX) { checks.add(new MaxNumberOfThreadsCheck()); } + checks.add(new MaxSizeVirtualMemoryCheck()); return Collections.unmodifiableList(checks); } @@ -249,4 +250,27 @@ final class BootstrapCheck { } + static class MaxSizeVirtualMemoryCheck implements Check { + + @Override + public boolean check() { + return getMaxSizeVirtualMemory() != Long.MIN_VALUE && getMaxSizeVirtualMemory() != JNACLibrary.RLIM_INFINITY; + } + + @Override + public String errorMessage() { + return String.format( + Locale.ROOT, + "max size virtual memory [%d] for user [%s] likely too low, increase to [unlimited]", + getMaxSizeVirtualMemory(), + BootstrapInfo.getSystemProperties().get("user.name")); + } + + // visible for testing + long getMaxSizeVirtualMemory() { + return JNANatives.MAX_SIZE_VIRTUAL_MEMORY; + } + + } + } diff --git a/core/src/main/java/org/elasticsearch/bootstrap/JNACLibrary.java b/core/src/main/java/org/elasticsearch/bootstrap/JNACLibrary.java index 573f3d5be3e..5d1369b21f7 100644 --- a/core/src/main/java/org/elasticsearch/bootstrap/JNACLibrary.java +++ b/core/src/main/java/org/elasticsearch/bootstrap/JNACLibrary.java @@ -39,6 +39,7 @@ final class JNACLibrary { public static final int MCL_CURRENT = 1; public static final int ENOMEM = 12; public static final int RLIMIT_MEMLOCK = Constants.MAC_OS_X ? 6 : 8; + public static final int RLIMIT_AS = Constants.MAC_OS_X ? 5 : 9; public static final long RLIM_INFINITY = Constants.MAC_OS_X ? 9223372036854775807L : -1L; static { diff --git a/core/src/main/java/org/elasticsearch/bootstrap/JNANatives.java b/core/src/main/java/org/elasticsearch/bootstrap/JNANatives.java index 0ea8da6a9be..e55d38a0f72 100644 --- a/core/src/main/java/org/elasticsearch/bootstrap/JNANatives.java +++ b/core/src/main/java/org/elasticsearch/bootstrap/JNANatives.java @@ -52,6 +52,8 @@ class JNANatives { // the user ID that owns the running Elasticsearch process static long MAX_NUMBER_OF_THREADS = -1; + static long MAX_SIZE_VIRTUAL_MEMORY = Long.MIN_VALUE; + static void tryMlockall() { int errno = Integer.MIN_VALUE; String errMsg = null; @@ -124,6 +126,17 @@ class JNANatives { } } + static void trySetMaxSizeVirtualMemory() { + if (Constants.LINUX || Constants.MAC_OS_X) { + final JNACLibrary.Rlimit rlimit = new JNACLibrary.Rlimit(); + if (JNACLibrary.getrlimit(JNACLibrary.RLIMIT_AS, rlimit) == 0) { + MAX_SIZE_VIRTUAL_MEMORY = rlimit.rlim_cur.longValue(); + } else { + logger.warn("unable to retrieve max size virtual memory [" + JNACLibrary.strerror(Native.getLastError()) + "]"); + } + } + } + static String rlimitToString(long value) { assert Constants.LINUX || Constants.MAC_OS_X; if (value == JNACLibrary.RLIM_INFINITY) { diff --git a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java index 5a1f71936e5..dfd3dfc33fd 100644 --- a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java +++ b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java @@ -24,7 +24,6 @@ import org.elasticsearch.Version; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.IndexScopedSettings; -import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.AnalysisService; @@ -34,7 +33,8 @@ import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.indices.mapper.MapperRegistry; import java.util.Collections; -import java.util.Map; + +import static org.elasticsearch.common.util.set.Sets.newHashSet; /** * This service is responsible for upgrading legacy index metadata to the current version @@ -47,13 +47,13 @@ import java.util.Map; public class MetaDataIndexUpgradeService extends AbstractComponent { private final MapperRegistry mapperRegistry; - private final IndexScopedSettings indexScopedSettigns; + private final IndexScopedSettings indexScopedSettings; @Inject public MetaDataIndexUpgradeService(Settings settings, MapperRegistry mapperRegistry, IndexScopedSettings indexScopedSettings) { super(settings); this.mapperRegistry = mapperRegistry; - this.indexScopedSettigns = indexScopedSettings; + this.indexScopedSettings = indexScopedSettings; } /** @@ -94,8 +94,7 @@ public class MetaDataIndexUpgradeService extends AbstractComponent { */ private void checkSupportedVersion(IndexMetaData indexMetaData) { if (indexMetaData.getState() == IndexMetaData.State.OPEN && isSupportedVersion(indexMetaData) == false) { - throw new IllegalStateException("The index [" + indexMetaData.getIndex() + "] was created before v2.0.0.beta1 and wasn't " + - "upgraded." + throw new IllegalStateException("The index [" + indexMetaData.getIndex() + "] was created before v2.0.0.beta1 and wasn't upgraded." + " This index should be open using a version before " + Version.CURRENT.minimumCompatibilityVersion() + " and upgraded using the upgrade API."); } @@ -128,12 +127,10 @@ public class MetaDataIndexUpgradeService extends AbstractComponent { SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap()); try (AnalysisService analysisService = new FakeAnalysisService(indexSettings)) { - try (MapperService mapperService = new MapperService(indexSettings, analysisService, similarityService, mapperRegistry, - () -> null)) { + try (MapperService mapperService = new MapperService(indexSettings, analysisService, similarityService, mapperRegistry, () -> null)) { for (ObjectCursor cursor : indexMetaData.getMappings().values()) { MappingMetaData mappingMetaData = cursor.value; - mapperService.merge(mappingMetaData.type(), mappingMetaData.source(), MapperService.MergeReason.MAPPING_RECOVERY, - false); + mapperService.merge(mappingMetaData.type(), mappingMetaData.source(), MapperService.MergeReason.MAPPING_RECOVERY, false); } } } @@ -147,8 +144,7 @@ public class MetaDataIndexUpgradeService extends AbstractComponent { * Marks index as upgraded so we don't have to test it again */ private IndexMetaData markAsUpgraded(IndexMetaData indexMetaData) { - Settings settings = Settings.builder().put(indexMetaData.getSettings()).put(IndexMetaData.SETTING_VERSION_UPGRADED, Version - .CURRENT).build(); + Settings settings = Settings.builder().put(indexMetaData.getSettings()).put(IndexMetaData.SETTING_VERSION_UPGRADED, Version.CURRENT).build(); return IndexMetaData.builder(indexMetaData).settings(settings).build(); } @@ -180,45 +176,13 @@ public class MetaDataIndexUpgradeService extends AbstractComponent { } } - private static final String ARCHIVED_SETTINGS_PREFIX = "archived."; - IndexMetaData archiveBrokenIndexSettings(IndexMetaData indexMetaData) { - Settings settings = indexMetaData.getSettings(); - Settings.Builder builder = Settings.builder(); - boolean changed = false; - for (Map.Entry entry : settings.getAsMap().entrySet()) { - try { - Setting setting = indexScopedSettigns.get(entry.getKey()); - if (setting != null) { - setting.get(settings); - builder.put(entry.getKey(), entry.getValue()); - } else { - if (indexScopedSettigns.isPrivateSetting(entry.getKey()) || entry.getKey().startsWith(ARCHIVED_SETTINGS_PREFIX)) { - builder.put(entry.getKey(), entry.getValue()); - } else { - changed = true; - logger.warn("[{}] found unknown index setting: {} value: {} - archiving", indexMetaData.getIndex(), entry.getKey - (), entry.getValue()); - // we put them back in here such that tools can check from the outside if there are any indices with broken - // settings. The setting can remain there - // but we want users to be aware that some of their setting are broken and they can research why and what they - // need to do to replace them. - builder.put(ARCHIVED_SETTINGS_PREFIX + entry.getKey(), entry.getValue()); - } - } - } catch (IllegalArgumentException ex) { - changed = true; - logger.warn("[{}] found invalid index setting: {} value: {} - archiving", ex, indexMetaData.getIndex(), entry.getKey(), - entry.getValue()); - // we put them back in here such that tools can check from the outside if there are any indices with broken settings. The - // setting can remain there - // but we want users to be aware that some of their setting sare broken and they can research why and what they need to - // do to replace them. - builder.put(ARCHIVED_SETTINGS_PREFIX + entry.getKey(), entry.getValue()); - } + final Settings settings = indexMetaData.getSettings(); + final Settings upgrade = indexScopedSettings.archiveUnknownOrBrokenSettings(settings); + if (upgrade != settings) { + return IndexMetaData.builder(indexMetaData).settings(upgrade).build(); + } else { + return indexMetaData; } - - return changed ? IndexMetaData.builder(indexMetaData).settings(builder.build()).build() : indexMetaData; } - -} +} \ No newline at end of file diff --git a/core/src/main/java/org/elasticsearch/cluster/service/ClusterService.java b/core/src/main/java/org/elasticsearch/cluster/service/ClusterService.java index 29c88a8baf5..d9a2ab785d7 100644 --- a/core/src/main/java/org/elasticsearch/cluster/service/ClusterService.java +++ b/core/src/main/java/org/elasticsearch/cluster/service/ClusterService.java @@ -1002,4 +1002,8 @@ public class ClusterService extends AbstractLifecycleComponent { } } } + + public ClusterSettings getClusterSettings() { + return clusterSettings; + } } diff --git a/core/src/main/java/org/elasticsearch/common/settings/AbstractScopedSettings.java b/core/src/main/java/org/elasticsearch/common/settings/AbstractScopedSettings.java index 410adc82da1..358706c9d3f 100644 --- a/core/src/main/java/org/elasticsearch/common/settings/AbstractScopedSettings.java +++ b/core/src/main/java/org/elasticsearch/common/settings/AbstractScopedSettings.java @@ -48,6 +48,7 @@ import java.util.stream.Collectors; * This service offers transactional application of updates settings. */ public abstract class AbstractScopedSettings extends AbstractComponent { + public static final String ARCHIVED_SETTINGS_PREFIX = "archived."; private Settings lastSettingsApplied = Settings.EMPTY; private final List> settingUpdaters = new CopyOnWriteArrayList<>(); private final Map> complexMatchers; @@ -478,4 +479,53 @@ public abstract class AbstractScopedSettings extends AbstractComponent { } return null; } + + /** + * Archives broken or unknown settings. Any setting that is not recognized or fails + * validation will be archived. This means the setting is prefixed with {@value ARCHIVED_SETTINGS_PREFIX} + * and remains in the settings object. This can be used to detect broken settings via APIs. + */ + public Settings archiveUnknownOrBrokenSettings(Settings settings) { + Settings.Builder builder = Settings.builder(); + boolean changed = false; + for (Map.Entry entry : settings.getAsMap().entrySet()) { + try { + Setting setting = get(entry.getKey()); + if (setting != null) { + setting.get(settings); + builder.put(entry.getKey(), entry.getValue()); + } else { + if (entry.getKey().startsWith(ARCHIVED_SETTINGS_PREFIX) || isPrivateSetting(entry.getKey())) { + builder.put(entry.getKey(), entry.getValue()); + } else { + changed = true; + logger.warn("found unknown setting: {} value: {} - archiving", entry.getKey(), entry.getValue()); + // we put them back in here such that tools can check from the outside if there are any indices with broken settings. The setting can remain there + // but we want users to be aware that some of their setting are broken and they can research why and what they need to do to replace them. + builder.put(ARCHIVED_SETTINGS_PREFIX + entry.getKey(), entry.getValue()); + } + } + } catch (IllegalArgumentException ex) { + changed = true; + logger.warn("found invalid setting: {} value: {} - archiving",ex , entry.getKey(), entry.getValue()); + // we put them back in here such that tools can check from the outside if there are any indices with broken settings. The setting can remain there + // but we want users to be aware that some of their setting sare broken and they can research why and what they need to do to replace them. + builder.put(ARCHIVED_SETTINGS_PREFIX + entry.getKey(), entry.getValue()); + } + } + if (changed) { + return builder.build(); + } else { + return settings; + } + } + + /** + * Returns true iff the setting is a private setting ie. it should be treated as valid even though it has no internal + * representation. Otherwise false + */ + // TODO this should be replaced by Setting.Property.HIDDEN or something like this. + protected boolean isPrivateSetting(String key) { + return false; + } } diff --git a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 322ac4de799..3fa9cdcddb0 100644 --- a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -171,7 +171,8 @@ public final class IndexScopedSettings extends AbstractScopedSettings { super.validateSettingKey(setting); } - public boolean isPrivateSetting(String key) { + @Override + protected final boolean isPrivateSetting(String key) { switch (key) { case IndexMetaData.SETTING_CREATION_DATE: case IndexMetaData.SETTING_INDEX_UUID: diff --git a/core/src/main/java/org/elasticsearch/common/util/IndexFolderUpgrader.java b/core/src/main/java/org/elasticsearch/common/util/IndexFolderUpgrader.java index 3640d3e4bec..221dc234511 100644 --- a/core/src/main/java/org/elasticsearch/common/util/IndexFolderUpgrader.java +++ b/core/src/main/java/org/elasticsearch/common/util/IndexFolderUpgrader.java @@ -24,12 +24,7 @@ import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.env.NodeEnvironment; -import org.elasticsearch.gateway.MetaDataStateFormat; -import org.elasticsearch.gateway.MetaStateService; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexSettings; diff --git a/core/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java b/core/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java index 8ca53af186c..2229d45840b 100644 --- a/core/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java +++ b/core/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java @@ -961,6 +961,10 @@ public final class XContentBuilder implements BytesStream, Releasable { return this; } + public XContentBuilder timeValueField(String rawFieldName, String readableFieldName, TimeValue timeValue) throws IOException { + return timeValueField(rawFieldName, readableFieldName, timeValue.millis(), TimeUnit.MILLISECONDS); + } + public XContentBuilder timeValueField(String rawFieldName, String readableFieldName, long rawTime, TimeUnit timeUnit) throws IOException { if (humanReadable) { diff --git a/core/src/main/java/org/elasticsearch/gateway/Gateway.java b/core/src/main/java/org/elasticsearch/gateway/Gateway.java index c879e6ab710..b2cb2d11079 100644 --- a/core/src/main/java/org/elasticsearch/gateway/Gateway.java +++ b/core/src/main/java/org/elasticsearch/gateway/Gateway.java @@ -19,6 +19,8 @@ package org.elasticsearch.gateway; +import com.carrotsearch.hppc.ObjectFloatHashMap; +import com.carrotsearch.hppc.cursors.ObjectCursor; import org.apache.lucene.util.IOUtils; import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.cluster.ClusterChangedEvent; @@ -28,9 +30,11 @@ import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.discovery.Discovery; import org.elasticsearch.env.NodeEnvironment; +import org.elasticsearch.index.Index; import org.elasticsearch.index.NodeServicesProvider; import org.elasticsearch.indices.IndicesService; @@ -84,6 +88,7 @@ public class Gateway extends AbstractComponent implements ClusterStateListener { } } + ObjectFloatHashMap indices = new ObjectFloatHashMap<>(); MetaData electedGlobalState = null; int found = 0; for (TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState) { @@ -96,34 +101,68 @@ public class Gateway extends AbstractComponent implements ClusterStateListener { } else if (nodeState.metaData().version() > electedGlobalState.version()) { electedGlobalState = nodeState.metaData(); } + for (ObjectCursor cursor : nodeState.metaData().indices().values()) { + indices.addTo(cursor.value.getIndex(), 1); + } } if (found < requiredAllocation) { listener.onFailure("found [" + found + "] metadata states, required [" + requiredAllocation + "]"); return; } - // verify index metadata - MetaData.Builder metaDataBuilder = MetaData.builder(electedGlobalState); - for (IndexMetaData indexMetaData : electedGlobalState) { - try { - if (indexMetaData.getState() == IndexMetaData.State.OPEN) { - // verify that we can actually create this index - if not we recover it as closed with lots of warn logs - indicesService.verifyIndexMetadata(nodeServicesProvider, indexMetaData); + // update the global state, and clean the indices, we elect them in the next phase + MetaData.Builder metaDataBuilder = MetaData.builder(electedGlobalState).removeAllIndices(); + + assert !indices.containsKey(null); + final Object[] keys = indices.keys; + for (int i = 0; i < keys.length; i++) { + if (keys[i] != null) { + Index index = (Index) keys[i]; + IndexMetaData electedIndexMetaData = null; + int indexMetaDataCount = 0; + for (TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState) { + if (nodeState.metaData() == null) { + continue; + } + IndexMetaData indexMetaData = nodeState.metaData().index(index); + if (indexMetaData == null) { + continue; + } + if (electedIndexMetaData == null) { + electedIndexMetaData = indexMetaData; + } else if (indexMetaData.getVersion() > electedIndexMetaData.getVersion()) { + electedIndexMetaData = indexMetaData; + } + indexMetaDataCount++; + } + if (electedIndexMetaData != null) { + if (indexMetaDataCount < requiredAllocation) { + logger.debug("[{}] found [{}], required [{}], not adding", index, indexMetaDataCount, requiredAllocation); + } // TODO if this logging statement is correct then we are missing an else here + try { + if (electedIndexMetaData.getState() == IndexMetaData.State.OPEN) { + // verify that we can actually create this index - if not we recover it as closed with lots of warn logs + indicesService.verifyIndexMetadata(nodeServicesProvider, electedIndexMetaData); + } + } catch (Exception e) { + logger.warn("recovering index {} failed - recovering as closed", e, electedIndexMetaData.getIndex()); + electedIndexMetaData = IndexMetaData.builder(electedIndexMetaData).state(IndexMetaData.State.CLOSE).build(); + } + + metaDataBuilder.put(electedIndexMetaData, false); } - } catch (Exception e) { - logger.warn("recovering index {} failed - recovering as closed", e, indexMetaData.getIndex()); - indexMetaData = IndexMetaData.builder(indexMetaData).state(IndexMetaData.State.CLOSE).build(); - metaDataBuilder.put(indexMetaData, true); } } + final ClusterSettings clusterSettings = clusterService.getClusterSettings(); + metaDataBuilder.persistentSettings(clusterSettings.archiveUnknownOrBrokenSettings(metaDataBuilder.persistentSettings())); + metaDataBuilder.transientSettings(clusterSettings.archiveUnknownOrBrokenSettings(metaDataBuilder.transientSettings())); ClusterState.Builder builder = ClusterState.builder(clusterService.state().getClusterName()); builder.metaData(metaDataBuilder); listener.onSuccess(builder.build()); } - public void reset() throws Exception { try { Path[] dataPaths = nodeEnv.nodeDataPaths(); - logger.trace("removing node data paths: [{}]", (Object) dataPaths); + logger.trace("removing node data paths: [{}]", (Object)dataPaths); IOUtils.rm(dataPaths); } catch (Exception ex) { logger.debug("failed to delete shard locations", ex); diff --git a/core/src/main/java/org/elasticsearch/index/analysis/Analysis.java b/core/src/main/java/org/elasticsearch/index/analysis/Analysis.java index b7481e78496..1054721535e 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/Analysis.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/Analysis.java @@ -67,8 +67,10 @@ import org.elasticsearch.env.Environment; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; +import java.nio.charset.CharacterCodingException; import java.nio.charset.StandardCharsets; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -163,7 +165,8 @@ public class Analysis { NAMED_STOP_WORDS = unmodifiableMap(namedStopWords); } - public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords, Map> namedWords, boolean ignoreCase) { + public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords, + Map> namedWords, boolean ignoreCase) { String value = settings.get(name); if (value != null) { if ("_none_".equals(value)) { @@ -237,12 +240,17 @@ public class Analysis { } } - final Path wordListFile = env.configFile().resolve(wordListPath); + final Path path = env.configFile().resolve(wordListPath); - try (BufferedReader reader = FileSystemUtils.newBufferedReader(wordListFile.toUri().toURL(), StandardCharsets.UTF_8)) { + try (BufferedReader reader = FileSystemUtils.newBufferedReader(path.toUri().toURL(), StandardCharsets.UTF_8)) { return loadWordList(reader, "#"); + } catch (CharacterCodingException ex) { + String message = String.format(Locale.ROOT, + "Unsupported character encoding detected while reading %s_path: %s - files must be UTF-8 encoded", + settingPrefix, path.toString()); + throw new IllegalArgumentException(message, ex); } catch (IOException ioe) { - String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix); + String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, path.toString()); throw new IllegalArgumentException(message, ioe); } } @@ -256,7 +264,7 @@ public class Analysis { } else { br = new BufferedReader(reader); } - String word = null; + String word; while ((word = br.readLine()) != null) { if (!Strings.hasText(word)) { continue; @@ -283,13 +291,16 @@ public class Analysis { if (filePath == null) { return null; } - final Path path = env.configFile().resolve(filePath); - try { return FileSystemUtils.newBufferedReader(path.toUri().toURL(), StandardCharsets.UTF_8); + } catch (CharacterCodingException ex) { + String message = String.format(Locale.ROOT, + "Unsupported character encoding detected while reading %s_path: %s files must be UTF-8 encoded", + settingPrefix, path.toString()); + throw new IllegalArgumentException(message, ex); } catch (IOException ioe) { - String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix); + String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, path.toString()); throw new IllegalArgumentException(message, ioe); } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java index 03ebcb9fe95..48d2bc75441 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.Query; import org.elasticsearch.common.Strings; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; @@ -32,6 +33,7 @@ import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MetadataFieldMapper; import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; import java.util.ArrayList; @@ -192,6 +194,14 @@ public class FieldNamesFieldMapper extends MetadataFieldMapper { public boolean useTermQueryWithQueryString() { return true; } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + if (isEnabled() == false) { + throw new IllegalStateException("Cannot run [exists] queries if the [_field_names] field is disabled"); + } + return super.termQuery(value, context); + } } private FieldNamesFieldMapper(Settings indexSettings, MappedFieldType existing) { diff --git a/core/src/main/java/org/elasticsearch/index/mapper/object/DynamicTemplate.java b/core/src/main/java/org/elasticsearch/index/mapper/object/DynamicTemplate.java index 58602f06dfa..44cdac17be1 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/object/DynamicTemplate.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/object/DynamicTemplate.java @@ -19,11 +19,15 @@ package org.elasticsearch.index.mapper.object; +import org.elasticsearch.Version; import org.elasticsearch.common.Strings; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.ContentPath; import org.elasticsearch.index.mapper.MapperParsingException; +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -33,30 +37,41 @@ import java.util.TreeMap; /** * */ -public class DynamicTemplate { +public class DynamicTemplate implements ToXContent { public static enum MatchType { - SIMPLE, - REGEX; + SIMPLE { + @Override + public String toString() { + return "simple"; + } + }, + REGEX { + @Override + public String toString() { + return "regex"; + } + }; public static MatchType fromString(String value) { - if ("simple".equals(value)) { - return SIMPLE; - } else if ("regex".equals(value)) { - return REGEX; + for (MatchType v : values()) { + if (v.toString().equals(value)) { + return v; + } } throw new IllegalArgumentException("No matching pattern matched on [" + value + "]"); } } - public static DynamicTemplate parse(String name, Map conf) throws MapperParsingException { + public static DynamicTemplate parse(String name, Map conf, + Version indexVersionCreated) throws MapperParsingException { String match = null; String pathMatch = null; String unmatch = null; String pathUnmatch = null; Map mapping = null; String matchMappingType = null; - String matchPattern = "simple"; + String matchPattern = MatchType.SIMPLE.toString(); for (Map.Entry entry : conf.entrySet()) { String propName = Strings.toUnderscoreCase(entry.getKey()); @@ -74,22 +89,18 @@ public class DynamicTemplate { matchPattern = entry.getValue().toString(); } else if ("mapping".equals(propName)) { mapping = (Map) entry.getValue(); + } else if (indexVersionCreated.onOrAfter(Version.V_5_0_0)) { + // unknown parameters were ignored before but still carried through serialization + // so we need to ignore them at parsing time for old indices + throw new IllegalArgumentException("Illegal dynamic template parameter: [" + propName + "]"); } } - if (match == null && pathMatch == null && matchMappingType == null) { - throw new MapperParsingException("template must have match, path_match or match_mapping_type set"); - } - if (mapping == null) { - throw new MapperParsingException("template must have mapping set"); - } - return new DynamicTemplate(name, conf, pathMatch, pathUnmatch, match, unmatch, matchMappingType, MatchType.fromString(matchPattern), mapping); + return new DynamicTemplate(name, pathMatch, pathUnmatch, match, unmatch, matchMappingType, MatchType.fromString(matchPattern), mapping); } private final String name; - private final Map conf; - private final String pathMatch; private final String pathUnmatch; @@ -104,9 +115,14 @@ public class DynamicTemplate { private final Map mapping; - public DynamicTemplate(String name, Map conf, String pathMatch, String pathUnmatch, String match, String unmatch, String matchMappingType, MatchType matchType, Map mapping) { + public DynamicTemplate(String name, String pathMatch, String pathUnmatch, String match, String unmatch, String matchMappingType, MatchType matchType, Map mapping) { + if (match == null && pathMatch == null && matchMappingType == null) { + throw new MapperParsingException("template must have match, path_match or match_mapping_type set"); + } + if (mapping == null) { + throw new MapperParsingException("template must have mapping set"); + } this.name = name; - this.conf = new TreeMap<>(conf); this.pathMatch = pathMatch; this.pathUnmatch = pathUnmatch; this.match = match; @@ -120,10 +136,6 @@ public class DynamicTemplate { return this.name; } - public Map conf() { - return this.conf; - } - public boolean match(ContentPath path, String name, String dynamicType) { if (pathMatch != null && !patternMatch(pathMatch, path.pathAsText(name))) { return false; @@ -148,10 +160,6 @@ public class DynamicTemplate { return true; } - public boolean hasType() { - return mapping.containsKey("type"); - } - public String mappingType(String dynamicType) { return mapping.containsKey("type") ? mapping.get("type").toString().replace("{dynamic_type}", dynamicType).replace("{dynamicType}", dynamicType) : dynamicType; } @@ -200,40 +208,29 @@ public class DynamicTemplate { } @Override - public boolean equals(Object o) { - if (this == o) { - return true; + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (match != null) { + builder.field("match", match); } - if (o == null || getClass() != o.getClass()) { - return false; + if (pathMatch != null) { + builder.field("path_match", pathMatch); } - - DynamicTemplate that = (DynamicTemplate) o; - - // check if same matching, if so, replace the mapping - if (match != null ? !match.equals(that.match) : that.match != null) { - return false; + if (unmatch != null) { + builder.field("unmatch", unmatch); } - if (matchMappingType != null ? !matchMappingType.equals(that.matchMappingType) : that.matchMappingType != null) { - return false; + if (pathUnmatch != null) { + builder.field("path_unmatch", pathUnmatch); } - if (matchType != that.matchType) { - return false; + if (matchMappingType != null) { + builder.field("match_mapping_type", matchMappingType); } - if (unmatch != null ? !unmatch.equals(that.unmatch) : that.unmatch != null) { - return false; + if (matchType != MatchType.SIMPLE) { + builder.field("match_pattern", matchType); } - - return true; - } - - @Override - public int hashCode() { - // check if same matching, if so, replace the mapping - int result = match != null ? match.hashCode() : 0; - result = 31 * result + (unmatch != null ? unmatch.hashCode() : 0); - result = 31 * result + (matchType != null ? matchType.hashCode() : 0); - result = 31 * result + (matchMappingType != null ? matchMappingType.hashCode() : 0); - return result; + // use a sorted map for consistent serialization + builder.field("mapping", new TreeMap<>(mapping)); + builder.endObject(); + return builder; } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/object/RootObjectMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/object/RootObjectMapper.java index 00de61acdb6..7e5dc3d28f5 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/object/RootObjectMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/object/RootObjectMapper.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper.object; +import org.elasticsearch.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.joda.FormatDateTimeFormatter; @@ -140,14 +141,15 @@ public class RootObjectMapper extends ObjectMapper { String fieldName = Strings.toUnderscoreCase(entry.getKey()); Object fieldNode = entry.getValue(); if (parseObjectOrDocumentTypeProperties(fieldName, fieldNode, parserContext, builder) - || processField(builder, fieldName, fieldNode)) { + || processField(builder, fieldName, fieldNode, parserContext.indexVersionCreated())) { iterator.remove(); } } return builder; } - protected boolean processField(ObjectMapper.Builder builder, String fieldName, Object fieldNode) { + protected boolean processField(ObjectMapper.Builder builder, String fieldName, Object fieldNode, + Version indexVersionCreated) { if (fieldName.equals("date_formats") || fieldName.equals("dynamic_date_formats")) { List dateTimeFormatters = new ArrayList<>(); if (fieldNode instanceof List) { @@ -185,7 +187,10 @@ public class RootObjectMapper extends ObjectMapper { throw new MapperParsingException("A dynamic template must be defined with a name"); } Map.Entry entry = tmpl.entrySet().iterator().next(); - ((Builder) builder).add(DynamicTemplate.parse(entry.getKey(), (Map) entry.getValue())); + String templateName = entry.getKey(); + Map templateParams = (Map) entry.getValue(); + DynamicTemplate template = DynamicTemplate.parse(templateName, templateParams, indexVersionCreated); + ((Builder) builder).add(template); } return true; } else if (fieldName.equals("date_detection")) { @@ -329,8 +334,7 @@ public class RootObjectMapper extends ObjectMapper { builder.startArray("dynamic_templates"); for (DynamicTemplate dynamicTemplate : dynamicTemplates) { builder.startObject(); - builder.field(dynamicTemplate.name()); - builder.map(dynamicTemplate.conf()); + builder.field(dynamicTemplate.name(), dynamicTemplate); builder.endObject(); } builder.endArray(); diff --git a/core/src/main/java/org/elasticsearch/index/query/ExistsQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/ExistsQueryBuilder.java index ba25d99ed36..79e38fe7be9 100644 --- a/core/src/main/java/org/elasticsearch/index/query/ExistsQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/ExistsQueryBuilder.java @@ -23,18 +23,16 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermRangeQuery; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.internal.FieldNamesFieldMapper; -import org.elasticsearch.index.mapper.object.ObjectMapper; import java.io.IOException; import java.util.Collection; +import java.util.Collections; import java.util.Objects; /** @@ -82,38 +80,18 @@ public class ExistsQueryBuilder extends AbstractQueryBuilder return Queries.newMatchNoDocsQuery(); } - ObjectMapper objectMapper = context.getObjectMapper(fieldPattern); - if (objectMapper != null) { - // automatic make the object mapper pattern - fieldPattern = fieldPattern + ".*"; - } - - Collection fields = context.simpleMatchToIndexNames(fieldPattern); - if (fields.isEmpty()) { - // no fields exists, so we should not match anything - return Queries.newMatchNoDocsQuery(); + final Collection fields; + if (context.getObjectMapper(fieldPattern) != null) { + // the _field_names field also indexes objects, so we don't have to + // do any more work to support exists queries on whole objects + fields = Collections.singleton(fieldPattern); + } else { + fields = context.simpleMatchToIndexNames(fieldPattern); } BooleanQuery.Builder boolFilterBuilder = new BooleanQuery.Builder(); for (String field : fields) { - MappedFieldType fieldType = context.fieldMapper(field); - Query filter = null; - if (fieldNamesFieldType.isEnabled()) { - final String f; - if (fieldType != null) { - f = fieldType.name(); - } else { - f = field; - } - filter = fieldNamesFieldType.termQuery(f, context); - } - // if _field_names are not indexed, we need to go the slow way - if (filter == null && fieldType != null) { - filter = fieldType.rangeQuery(null, null, true, true); - } - if (filter == null) { - filter = new TermRangeQuery(field, null, null, true, true); - } + Query filter = fieldNamesFieldType.termQuery(field, context); boolFilterBuilder.add(filter, BooleanClause.Occur.SHOULD); } return new ConstantScoreQuery(boolFilterBuilder.build()); diff --git a/core/src/main/java/org/elasticsearch/index/query/QueryShardContext.java b/core/src/main/java/org/elasticsearch/index/query/QueryShardContext.java index a21b53cdf51..63eff82ddb0 100644 --- a/core/src/main/java/org/elasticsearch/index/query/QueryShardContext.java +++ b/core/src/main/java/org/elasticsearch/index/query/QueryShardContext.java @@ -105,6 +105,7 @@ public class QueryShardContext extends QueryRewriteContext { this.allowUnmappedFields = indexSettings.isDefaultAllowUnmappedFields(); this.indicesQueriesRegistry = indicesQueriesRegistry; this.percolatorQueryCache = percolatorQueryCache; + this.nestedScope = new NestedScope(); } public QueryShardContext(QueryShardContext source) { @@ -113,6 +114,7 @@ public class QueryShardContext extends QueryRewriteContext { } + @Override public QueryShardContext clone() { return new QueryShardContext(indexSettings, bitsetFilterCache, indexFieldDataService, mapperService, similarityService, scriptService, indicesQueriesRegistry, percolatorQueryCache); } diff --git a/core/src/main/java/org/elasticsearch/index/query/RangeQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/RangeQueryBuilder.java index c3953a51170..b1132c42ea9 100644 --- a/core/src/main/java/org/elasticsearch/index/query/RangeQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/RangeQueryBuilder.java @@ -22,10 +22,6 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.action.fieldstats.FieldStats; -import org.elasticsearch.action.fieldstats.IndexConstraint; -import org.elasticsearch.action.fieldstats.IndexConstraint.Comparison; -import org.elasticsearch.action.fieldstats.IndexConstraint.Property; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -259,8 +255,8 @@ public class RangeQueryBuilder extends AbstractQueryBuilder i } @Override - protected QueryBuilder doRewrite(QueryRewriteContext queryShardContext) throws IOException { - FieldStatsProvider fieldStatsProvider = queryShardContext.getFieldStatsProvider(); + protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException { + FieldStatsProvider fieldStatsProvider = queryRewriteContext.getFieldStatsProvider(); // If the fieldStatsProvider is null we are not on the shard and cannot // rewrite so just return without rewriting if (fieldStatsProvider != null) { @@ -271,17 +267,10 @@ public class RangeQueryBuilder extends AbstractQueryBuilder i case DISJOINT: return new MatchNoneQueryBuilder(); case WITHIN: - FieldStats fieldStats = fieldStatsProvider.get(fieldName); - if (!(fieldStats.getMinValue().equals(from) && fieldStats.getMaxValue().equals(to) && includeUpper && includeLower)) { - // Rebuild the range query with the bounds for this shard. - // The includeLower/Upper values are preserved only if the - // bound has not been changed by the rewrite + if (from != null || to != null) { RangeQueryBuilder newRangeQuery = new RangeQueryBuilder(fieldName); - String dateFormatString = format == null ? null : format.format(); - newRangeQuery.from(fieldStats.getMinValue(), includeLower || fieldStats.match( - new IndexConstraint(fieldName, Property.MIN, Comparison.GT, fieldStats.stringValueOf(from, dateFormatString)))); - newRangeQuery.to(fieldStats.getMaxValue(), includeUpper || fieldStats.match( - new IndexConstraint(fieldName, Property.MAX, Comparison.LT, fieldStats.stringValueOf(to, dateFormatString)))); + newRangeQuery.from(null); + newRangeQuery.to(null); newRangeQuery.format = format; newRangeQuery.timeZone = timeZone; return newRangeQuery; diff --git a/core/src/main/java/org/elasticsearch/index/query/support/NestedInnerQueryParseSupport.java b/core/src/main/java/org/elasticsearch/index/query/support/NestedInnerQueryParseSupport.java index 86983026b19..5c65a57d532 100644 --- a/core/src/main/java/org/elasticsearch/index/query/support/NestedInnerQueryParseSupport.java +++ b/core/src/main/java/org/elasticsearch/index/query/support/NestedInnerQueryParseSupport.java @@ -20,9 +20,7 @@ package org.elasticsearch.index.query.support; import org.apache.lucene.search.Query; -import org.apache.lucene.search.join.BitSetProducer; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; @@ -45,46 +43,18 @@ public class NestedInnerQueryParseSupport { protected final QueryParseContext parseContext; private BytesReference source; - private Query innerQuery; private Query innerFilter; protected String path; private boolean filterParsed = false; - private boolean queryParsed = false; - protected boolean queryFound = false; protected boolean filterFound = false; - protected BitSetProducer parentFilter; - protected Query childFilter; - protected ObjectMapper nestedObjectMapper; - private ObjectMapper parentObjectMapper; public NestedInnerQueryParseSupport(XContentParser parser, QueryShardContext context) { shardContext = context; parseContext = shardContext.parseContext(); shardContext.reset(parser); - - } - - public NestedInnerQueryParseSupport(QueryShardContext context) { - this.parseContext = context.parseContext(); - this.shardContext = context; - } - - public void query() throws IOException { - if (path != null) { - setPathLevel(); - try { - innerQuery = parseContext.parseInnerQueryBuilder().toQuery(this.shardContext); - } finally { - resetPathLevel(); - } - queryParsed = true; - } else { - source = XContentFactory.smileBuilder().copyCurrentStructure(parseContext.parser()).bytes(); - } - queryFound = true; } public void filter() throws IOException { @@ -103,35 +73,6 @@ public class NestedInnerQueryParseSupport { filterFound = true; } - public Query getInnerQuery() throws IOException { - if (queryParsed) { - return innerQuery; - } else { - if (path == null) { - throw new QueryShardException(shardContext, "[nested] requires 'path' field"); - } - if (!queryFound) { - throw new QueryShardException(shardContext, "[nested] requires either 'query' or 'filter' field"); - } - - XContentParser old = parseContext.parser(); - try { - XContentParser innerParser = XContentHelper.createParser(source); - parseContext.parser(innerParser); - setPathLevel(); - try { - innerQuery = parseContext.parseInnerQueryBuilder().toQuery(this.shardContext); - } finally { - resetPathLevel(); - } - queryParsed = true; - return innerQuery; - } finally { - parseContext.parser(old); - } - } - } - public Query getInnerFilter() throws IOException { if (filterParsed) { return innerFilter; @@ -178,27 +119,12 @@ public class NestedInnerQueryParseSupport { return nestedObjectMapper; } - public boolean queryFound() { - return queryFound; - } - public boolean filterFound() { return filterFound; } - public ObjectMapper getParentObjectMapper() { - return parentObjectMapper; - } - private void setPathLevel() { - ObjectMapper objectMapper = shardContext.nestedScope().getObjectMapper(); - if (objectMapper == null) { - parentFilter = shardContext.bitsetFilter(Queries.newNonNestedFilter()); - } else { - parentFilter = shardContext.bitsetFilter(objectMapper.nestedTypeFilter()); - } - childFilter = nestedObjectMapper.nestedTypeFilter(); - parentObjectMapper = shardContext.nestedScope().nextLevel(nestedObjectMapper); + shardContext.nestedScope().nextLevel(nestedObjectMapper); } private void resetPathLevel() { diff --git a/core/src/main/java/org/elasticsearch/rest/action/cat/RestIndicesAction.java b/core/src/main/java/org/elasticsearch/rest/action/cat/RestIndicesAction.java index 77366e1cc81..398ef9c15f7 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/cat/RestIndicesAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/cat/RestIndicesAction.java @@ -135,22 +135,22 @@ public class RestIndicesAction extends AbstractCatAction { table.addCell("fielddata.evictions", "sibling:pri;alias:fe,fielddataEvictions;default:false;text-align:right;desc:fielddata evictions"); table.addCell("pri.fielddata.evictions", "default:false;text-align:right;desc:fielddata evictions"); - table.addCell("query_cache.memory_size", "sibling:pri;alias:fcm,queryCacheMemory;default:false;text-align:right;desc:used query cache"); + table.addCell("query_cache.memory_size", "sibling:pri;alias:qcm,queryCacheMemory;default:false;text-align:right;desc:used query cache"); table.addCell("pri.query_cache.memory_size", "default:false;text-align:right;desc:used query cache"); - table.addCell("query_cache.evictions", "sibling:pri;alias:fce,queryCacheEvictions;default:false;text-align:right;desc:query cache evictions"); + table.addCell("query_cache.evictions", "sibling:pri;alias:qce,queryCacheEvictions;default:false;text-align:right;desc:query cache evictions"); table.addCell("pri.query_cache.evictions", "default:false;text-align:right;desc:query cache evictions"); - table.addCell("request_cache.memory_size", "sibling:pri;alias:qcm,queryCacheMemory;default:false;text-align:right;desc:used request cache"); + table.addCell("request_cache.memory_size", "sibling:pri;alias:rcm,requestCacheMemory;default:false;text-align:right;desc:used request cache"); table.addCell("pri.request_cache.memory_size", "default:false;text-align:right;desc:used request cache"); - table.addCell("request_cache.evictions", "sibling:pri;alias:qce,queryCacheEvictions;default:false;text-align:right;desc:request cache evictions"); + table.addCell("request_cache.evictions", "sibling:pri;alias:rce,requestCacheEvictions;default:false;text-align:right;desc:request cache evictions"); table.addCell("pri.request_cache.evictions", "default:false;text-align:right;desc:request cache evictions"); - table.addCell("request_cache.hit_count", "sibling:pri;alias:qchc,queryCacheHitCount;default:false;text-align:right;desc:request cache hit count"); + table.addCell("request_cache.hit_count", "sibling:pri;alias:rchc,requestCacheHitCount;default:false;text-align:right;desc:request cache hit count"); table.addCell("pri.request_cache.hit_count", "default:false;text-align:right;desc:request cache hit count"); - table.addCell("request_cache.miss_count", "sibling:pri;alias:qcmc,queryCacheMissCount;default:false;text-align:right;desc:request cache miss count"); + table.addCell("request_cache.miss_count", "sibling:pri;alias:rcmc,requestCacheMissCount;default:false;text-align:right;desc:request cache miss count"); table.addCell("pri.request_cache.miss_count", "default:false;text-align:right;desc:request cache miss count"); table.addCell("flush.total", "sibling:pri;alias:ft,flushTotal;default:false;text-align:right;desc:number of flushes"); diff --git a/core/src/main/java/org/elasticsearch/rest/action/cat/RestNodesAction.java b/core/src/main/java/org/elasticsearch/rest/action/cat/RestNodesAction.java index 4b97f8a942b..c7b1550cbae 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/cat/RestNodesAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/cat/RestNodesAction.java @@ -150,13 +150,13 @@ public class RestNodesAction extends AbstractCatAction { table.addCell("fielddata.memory_size", "alias:fm,fielddataMemory;default:false;text-align:right;desc:used fielddata cache"); table.addCell("fielddata.evictions", "alias:fe,fielddataEvictions;default:false;text-align:right;desc:fielddata evictions"); - table.addCell("query_cache.memory_size", "alias:fcm,queryCacheMemory;default:false;text-align:right;desc:used query cache"); - table.addCell("query_cache.evictions", "alias:fce,queryCacheEvictions;default:false;text-align:right;desc:query cache evictions"); + table.addCell("query_cache.memory_size", "alias:qcm,queryCacheMemory;default:false;text-align:right;desc:used query cache"); + table.addCell("query_cache.evictions", "alias:qce,queryCacheEvictions;default:false;text-align:right;desc:query cache evictions"); - table.addCell("request_cache.memory_size", "alias:qcm,requestCacheMemory;default:false;text-align:right;desc:used request cache"); - table.addCell("request_cache.evictions", "alias:qce,requestCacheEvictions;default:false;text-align:right;desc:request cache evictions"); - table.addCell("request_cache.hit_count", "alias:qchc,requestCacheHitCount;default:false;text-align:right;desc:request cache hit counts"); - table.addCell("request_cache.miss_count", "alias:qcmc,requestCacheMissCount;default:false;text-align:right;desc:request cache miss counts"); + table.addCell("request_cache.memory_size", "alias:rcm,requestCacheMemory;default:false;text-align:right;desc:used request cache"); + table.addCell("request_cache.evictions", "alias:rce,requestCacheEvictions;default:false;text-align:right;desc:request cache evictions"); + table.addCell("request_cache.hit_count", "alias:rchc,requestCacheHitCount;default:false;text-align:right;desc:request cache hit counts"); + table.addCell("request_cache.miss_count", "alias:rcmc,requestCacheMissCount;default:false;text-align:right;desc:request cache miss counts"); table.addCell("flush.total", "alias:ft,flushTotal;default:false;text-align:right;desc:number of flushes"); table.addCell("flush.total_time", "alias:ftt,flushTotalTime;default:false;text-align:right;desc:time spent in flush"); diff --git a/core/src/main/java/org/elasticsearch/rest/action/cat/RestShardsAction.java b/core/src/main/java/org/elasticsearch/rest/action/cat/RestShardsAction.java index 1b3f239ae5f..8bf67653f6f 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/cat/RestShardsAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/cat/RestShardsAction.java @@ -109,8 +109,8 @@ public class RestShardsAction extends AbstractCatAction { table.addCell("fielddata.memory_size", "alias:fm,fielddataMemory;default:false;text-align:right;desc:used fielddata cache"); table.addCell("fielddata.evictions", "alias:fe,fielddataEvictions;default:false;text-align:right;desc:fielddata evictions"); - table.addCell("query_cache.memory_size", "alias:fcm,queryCacheMemory;default:false;text-align:right;desc:used query cache"); - table.addCell("query_cache.evictions", "alias:fce,queryCacheEvictions;default:false;text-align:right;desc:query cache evictions"); + table.addCell("query_cache.memory_size", "alias:qcm,queryCacheMemory;default:false;text-align:right;desc:used query cache"); + table.addCell("query_cache.evictions", "alias:qce,queryCacheEvictions;default:false;text-align:right;desc:query cache evictions"); table.addCell("flush.total", "alias:ft,flushTotal;default:false;text-align:right;desc:number of flushes"); table.addCell("flush.total_time", "alias:ftt,flushTotalTime;default:false;text-align:right;desc:time spent in flush"); diff --git a/core/src/main/java/org/elasticsearch/search/SearchService.java b/core/src/main/java/org/elasticsearch/search/SearchService.java index be2e52b5aa3..a1b7f93d0d1 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchService.java +++ b/core/src/main/java/org/elasticsearch/search/SearchService.java @@ -549,8 +549,14 @@ public class SearchService extends AbstractLifecycleComponent imp indexShard, scriptService, pageCacheRecycler, bigArrays, threadPool.estimatedTimeInMillisCounter(), parseFieldMatcher, defaultSearchTimeout, fetchPhase); context.getQueryShardContext().setFieldStatsProvider(new FieldStatsProvider(engineSearcher, indexService.mapperService())); - request.rewrite(context.getQueryShardContext()); SearchContext.setCurrent(context); + request.rewrite(context.getQueryShardContext()); + // reset that we have used nowInMillis from the context since it may + // have been rewritten so its no longer in the query and the request can + // be cached. If it is still present in the request (e.g. in a range + // aggregation) it will still be caught when the aggregation is + // evaluated. + context.resetNowInMillisUsed(); try { if (request.scroll() != null) { context.scrollContext(new ScrollContext()); diff --git a/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java index ec47c6327cf..1881109c9b6 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java @@ -149,6 +149,10 @@ public abstract class SearchContext implements Releasable { return nowInMillisUsed; } + public final void resetNowInMillisUsed() { + this.nowInMillisUsed = false; + } + protected abstract long nowInMillisImpl(); public abstract ScrollContext scrollContext(); diff --git a/core/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java b/core/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java index a5707ea4a53..02f39cf9490 100644 --- a/core/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; @@ -27,8 +28,14 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.search.MultiValueMode; import java.io.IOException; import java.util.Objects; @@ -47,6 +54,13 @@ public class FieldSortBuilder extends SortBuilder implements S public static final ParseField SORT_MODE = new ParseField("mode"); public static final ParseField UNMAPPED_TYPE = new ParseField("unmapped_type"); + /** + * special field name to sort by index order + */ + public static final String DOC_FIELD_NAME = "_doc"; + private static final SortField SORT_DOC = new SortField(null, SortField.Type.DOC); + private static final SortField SORT_DOC_REVERSE = new SortField(null, SortField.Type.DOC, true); + private final String fieldName; private Object missing; @@ -161,7 +175,7 @@ public class FieldSortBuilder extends SortBuilder implements S * TODO should the above getters and setters be deprecated/ changed in * favour of real getters and setters? */ - public FieldSortBuilder setNestedFilter(QueryBuilder nestedFilter) { + public FieldSortBuilder setNestedFilter(QueryBuilder nestedFilter) { this.nestedFilter = nestedFilter; return this; } @@ -170,7 +184,7 @@ public class FieldSortBuilder extends SortBuilder implements S * Returns the nested filter that the nested objects should match with in * order to be taken into account for sorting. */ - public QueryBuilder getNestedFilter() { + public QueryBuilder getNestedFilter() { return this.nestedFilter; } @@ -219,6 +233,49 @@ public class FieldSortBuilder extends SortBuilder implements S return builder; } + @Override + public SortField build(QueryShardContext context) throws IOException { + if (DOC_FIELD_NAME.equals(fieldName)) { + if (order == SortOrder.DESC) { + return SORT_DOC_REVERSE; + } else { + return SORT_DOC; + } + } else { + MappedFieldType fieldType = context.fieldMapper(fieldName); + if (fieldType == null) { + if (unmappedType != null) { + fieldType = context.getMapperService().unmappedFieldType(unmappedType); + } else { + throw new QueryShardException(context, "No mapping found for [" + fieldName + "] in order to sort on"); + } + } + + if (!fieldType.isSortable()) { + throw new QueryShardException(context, "Sorting not supported for field[" + fieldName + "]"); + } + + MultiValueMode localSortMode = null; + if (sortMode != null) { + localSortMode = MultiValueMode.fromString(sortMode.toString()); + } + + if (fieldType.isNumeric() == false && (sortMode == SortMode.SUM || sortMode == SortMode.AVG || sortMode == SortMode.MEDIAN)) { + throw new QueryShardException(context, "we only support AVG, MEDIAN and SUM on number based fields"); + } + + boolean reverse = (order == SortOrder.DESC); + if (localSortMode == null) { + localSortMode = reverse ? MultiValueMode.MAX : MultiValueMode.MIN; + } + + final Nested nested = resolveNested(context, nestedPath, nestedFilter); + IndexFieldData.XFieldComparatorSource fieldComparatorSource = context.getForField(fieldType) + .comparatorSource(missing, localSortMode, nested); + return new SortField(fieldType.name(), fieldComparatorSource, reverse); + } + } + @Override public boolean equals(Object other) { if (this == other) { diff --git a/core/src/main/java/org/elasticsearch/search/sort/GeoDistanceSortBuilder.java b/core/src/main/java/org/elasticsearch/search/sort/GeoDistanceSortBuilder.java index 9785a0fc240..4263e148323 100644 --- a/core/src/main/java/org/elasticsearch/search/sort/GeoDistanceSortBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/sort/GeoDistanceSortBuilder.java @@ -19,8 +19,18 @@ package org.elasticsearch.search.sort; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BitSet; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.Version; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.geo.GeoDistance; +import org.elasticsearch.common.geo.GeoDistance.FixedSourceDistance; import org.elasticsearch.common.geo.GeoPoint; import org.elasticsearch.common.geo.GeoUtils; import org.elasticsearch.common.io.stream.StreamInput; @@ -28,8 +38,17 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.unit.DistanceUnit; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; +import org.elasticsearch.index.fielddata.IndexGeoPointFieldData; +import org.elasticsearch.index.fielddata.MultiGeoPointValues; +import org.elasticsearch.index.fielddata.NumericDoubleValues; +import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.search.MultiValueMode; import java.io.IOException; import java.util.ArrayList; @@ -45,6 +64,14 @@ public class GeoDistanceSortBuilder extends SortBuilder public static final String NAME = "_geo_distance"; public static final boolean DEFAULT_COERCE = false; public static final boolean DEFAULT_IGNORE_MALFORMED = false; + public static final ParseField UNIT_FIELD = new ParseField("unit"); + public static final ParseField REVERSE_FIELD = new ParseField("reverse"); + public static final ParseField DISTANCE_TYPE_FIELD = new ParseField("distance_type"); + public static final ParseField COERCE_FIELD = new ParseField("coerce", "normalize"); + public static final ParseField IGNORE_MALFORMED_FIELD = new ParseField("ignore_malformed"); + public static final ParseField SORTMODE_FIELD = new ParseField("mode", "sort_mode"); + public static final ParseField NESTED_PATH_FIELD = new ParseField("nested_path"); + public static final ParseField NESTED_FILTER_FIELD = new ParseField("nested_filter"); static final GeoDistanceSortBuilder PROTOTYPE = new GeoDistanceSortBuilder("", -1, -1); @@ -280,22 +307,22 @@ public class GeoDistanceSortBuilder extends SortBuilder } builder.endArray(); - builder.field("unit", unit); - builder.field("distance_type", geoDistance.name().toLowerCase(Locale.ROOT)); + builder.field(UNIT_FIELD.getPreferredName(), unit); + builder.field(DISTANCE_TYPE_FIELD.getPreferredName(), geoDistance.name().toLowerCase(Locale.ROOT)); builder.field(ORDER_FIELD.getPreferredName(), order); if (sortMode != null) { - builder.field("mode", sortMode); + builder.field(SORTMODE_FIELD.getPreferredName(), sortMode); } if (nestedPath != null) { - builder.field("nested_path", nestedPath); + builder.field(NESTED_PATH_FIELD.getPreferredName(), nestedPath); } if (nestedFilter != null) { - builder.field("nested_filter", nestedFilter, params); + builder.field(NESTED_FILTER_FIELD.getPreferredName(), nestedFilter, params); } - builder.field("coerce", coerce); - builder.field("ignore_malformed", ignoreMalformed); + builder.field(COERCE_FIELD.getPreferredName(), coerce); + builder.field(IGNORE_MALFORMED_FIELD.getPreferredName(), ignoreMalformed); builder.endObject(); return builder; @@ -383,6 +410,7 @@ public class GeoDistanceSortBuilder extends SortBuilder @Override public GeoDistanceSortBuilder fromXContent(QueryParseContext context, String elementName) throws IOException { XContentParser parser = context.parser(); + ParseFieldMatcher parseFieldMatcher = context.parseFieldMatcher(); String fieldName = null; List geoPoints = new ArrayList<>(); DistanceUnit unit = DistanceUnit.DEFAULT; @@ -405,40 +433,37 @@ public class GeoDistanceSortBuilder extends SortBuilder fieldName = currentName; } else if (token == XContentParser.Token.START_OBJECT) { - // the json in the format of -> field : { lat : 30, lon : 12 } - if ("nested_filter".equals(currentName) || "nestedFilter".equals(currentName)) { - // TODO Note to remember: while this is kept as a QueryBuilder internally, - // we need to make sure to call toFilter() on it once on the shard - // (e.g. in the new build() method) + if (parseFieldMatcher.match(currentName, NESTED_FILTER_FIELD)) { nestedFilter = context.parseInnerQueryBuilder(); } else { + // the json in the format of -> field : { lat : 30, lon : 12 } fieldName = currentName; GeoPoint point = new GeoPoint(); GeoUtils.parseGeoPoint(parser, point); geoPoints.add(point); } } else if (token.isValue()) { - if ("reverse".equals(currentName)) { + if (parseFieldMatcher.match(currentName, REVERSE_FIELD)) { order = parser.booleanValue() ? SortOrder.DESC : SortOrder.ASC; - } else if ("order".equals(currentName)) { + } else if (parseFieldMatcher.match(currentName, ORDER_FIELD)) { order = SortOrder.fromString(parser.text()); - } else if ("unit".equals(currentName)) { + } else if (parseFieldMatcher.match(currentName, UNIT_FIELD)) { unit = DistanceUnit.fromString(parser.text()); - } else if ("distance_type".equals(currentName) || "distanceType".equals(currentName)) { + } else if (parseFieldMatcher.match(currentName, DISTANCE_TYPE_FIELD)) { geoDistance = GeoDistance.fromString(parser.text()); - } else if ("coerce".equals(currentName) || "normalize".equals(currentName)) { + } else if (parseFieldMatcher.match(currentName, COERCE_FIELD)) { coerce = parser.booleanValue(); if (coerce == true) { ignoreMalformed = true; } - } else if ("ignore_malformed".equals(currentName)) { + } else if (parseFieldMatcher.match(currentName, IGNORE_MALFORMED_FIELD)) { boolean ignore_malformed_value = parser.booleanValue(); if (coerce == false) { ignoreMalformed = ignore_malformed_value; } - } else if ("sort_mode".equals(currentName) || "sortMode".equals(currentName) || "mode".equals(currentName)) { + } else if (parseFieldMatcher.match(currentName, SORTMODE_FIELD)) { sortMode = SortMode.fromString(parser.text()); - } else if ("nested_path".equals(currentName) || "nestedPath".equals(currentName)) { + } else if (parseFieldMatcher.match(currentName, NESTED_PATH_FIELD)) { nestedPath = parser.text(); } else { GeoPoint point = new GeoPoint(); @@ -461,7 +486,85 @@ public class GeoDistanceSortBuilder extends SortBuilder result.coerce(coerce); result.ignoreMalformed(ignoreMalformed); return result; + } + @Override + public SortField build(QueryShardContext context) throws IOException { + final boolean indexCreatedBeforeV2_0 = context.indexVersionCreated().before(Version.V_2_0_0); + // validation was not available prior to 2.x, so to support bwc percolation queries we only ignore_malformed on 2.x created indexes + List localPoints = new ArrayList(); + for (GeoPoint geoPoint : this.points) { + localPoints.add(new GeoPoint(geoPoint)); + } + + if (!indexCreatedBeforeV2_0 && !ignoreMalformed) { + for (GeoPoint point : localPoints) { + if (GeoUtils.isValidLatitude(point.lat()) == false) { + throw new ElasticsearchParseException("illegal latitude value [{}] for [GeoDistanceSort]", point.lat()); + } + if (GeoUtils.isValidLongitude(point.lon()) == false) { + throw new ElasticsearchParseException("illegal longitude value [{}] for [GeoDistanceSort]", point.lon()); + } + } + } + + if (coerce) { + for (GeoPoint point : localPoints) { + GeoUtils.normalizePoint(point, coerce, coerce); + } + } + + boolean reverse = (order == SortOrder.DESC); + final MultiValueMode finalSortMode; + if (sortMode == null) { + finalSortMode = reverse ? MultiValueMode.MAX : MultiValueMode.MIN; + } else { + finalSortMode = MultiValueMode.fromString(sortMode.toString()); + } + + MappedFieldType fieldType = context.fieldMapper(fieldName); + if (fieldType == null) { + throw new IllegalArgumentException("failed to find mapper for [" + fieldName + "] for geo distance based sort"); + } + final IndexGeoPointFieldData geoIndexFieldData = context.getForField(fieldType); + final FixedSourceDistance[] distances = new FixedSourceDistance[localPoints.size()]; + for (int i = 0; i< localPoints.size(); i++) { + distances[i] = geoDistance.fixedSourceDistance(localPoints.get(i).lat(), localPoints.get(i).lon(), unit); + } + + final Nested nested = resolveNested(context, nestedPath, nestedFilter); + + IndexFieldData.XFieldComparatorSource geoDistanceComparatorSource = new IndexFieldData.XFieldComparatorSource() { + + @Override + public SortField.Type reducedType() { + return SortField.Type.DOUBLE; + } + + @Override + public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException { + return new FieldComparator.DoubleComparator(numHits, null, null) { + @Override + protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { + final MultiGeoPointValues geoPointValues = geoIndexFieldData.load(context).getGeoPointValues(); + final SortedNumericDoubleValues distanceValues = GeoDistance.distanceValues(geoPointValues, distances); + final NumericDoubleValues selectedValues; + if (nested == null) { + selectedValues = finalSortMode.select(distanceValues, Double.MAX_VALUE); + } else { + final BitSet rootDocs = nested.rootDocs(context); + final DocIdSetIterator innerDocs = nested.innerDocs(context); + selectedValues = finalSortMode.select(distanceValues, Double.MAX_VALUE, rootDocs, innerDocs, + context.reader().maxDoc()); + } + return selectedValues.getRawDoubleValues(); + } + }; + } + + }; + + return new SortField(fieldName, geoDistanceComparatorSource, reverse); } static void parseGeoPoints(XContentParser parser, List geoPoints) throws IOException { diff --git a/core/src/main/java/org/elasticsearch/search/sort/GeoDistanceSortParser.java b/core/src/main/java/org/elasticsearch/search/sort/GeoDistanceSortParser.java index d1eabf89e45..aff0e68fc1d 100644 --- a/core/src/main/java/org/elasticsearch/search/sort/GeoDistanceSortParser.java +++ b/core/src/main/java/org/elasticsearch/search/sort/GeoDistanceSortParser.java @@ -62,7 +62,7 @@ public class GeoDistanceSortParser implements SortParser { } @Override - public SortField parse(XContentParser parser, QueryShardContext context) throws Exception { + public SortField parse(XContentParser parser, QueryShardContext context) throws IOException { String fieldName = null; List geoPoints = new ArrayList<>(); DistanceUnit unit = DistanceUnit.DEFAULT; diff --git a/core/src/main/java/org/elasticsearch/search/sort/ScoreSortBuilder.java b/core/src/main/java/org/elasticsearch/search/sort/ScoreSortBuilder.java index 76ca56f0f9f..422be339788 100644 --- a/core/src/main/java/org/elasticsearch/search/sort/ScoreSortBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/sort/ScoreSortBuilder.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.sort; +import org.apache.lucene.search.SortField; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.ParsingException; @@ -27,6 +28,7 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; import java.util.Objects; @@ -40,6 +42,8 @@ public class ScoreSortBuilder extends SortBuilder implements S static final ScoreSortBuilder PROTOTYPE = new ScoreSortBuilder(); public static final ParseField REVERSE_FIELD = new ParseField("reverse"); public static final ParseField ORDER_FIELD = new ParseField("order"); + private static final SortField SORT_SCORE = new SortField(null, SortField.Type.SCORE); + private static final SortField SORT_SCORE_REVERSE = new SortField(null, SortField.Type.SCORE, true); public ScoreSortBuilder() { // order defaults to desc when sorting on the _score @@ -84,6 +88,14 @@ public class ScoreSortBuilder extends SortBuilder implements S return result; } + public SortField build(QueryShardContext context) { + if (order == SortOrder.DESC) { + return SORT_SCORE; + } else { + return SORT_SCORE_REVERSE; + } + } + @Override public boolean equals(Object object) { if (this == object) { diff --git a/core/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java b/core/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java index e77d12ce478..6005d9354ff 100644 --- a/core/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java @@ -19,6 +19,12 @@ package org.elasticsearch.search.sort; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.ParsingException; @@ -27,14 +33,29 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.fielddata.FieldData; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; +import org.elasticsearch.index.fielddata.NumericDoubleValues; +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; +import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource; +import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.script.LeafSearchScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.Script.ScriptField; +import org.elasticsearch.script.ScriptContext; import org.elasticsearch.script.ScriptParameterParser; import org.elasticsearch.script.ScriptParameterParser.ScriptParameterValue; +import org.elasticsearch.script.SearchScript; +import org.elasticsearch.search.MultiValueMode; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -56,7 +77,7 @@ public class ScriptSortBuilder extends SortBuilder implements private final Script script; - private ScriptSortType type; + private final ScriptSortType type; private SortMode sortMode; @@ -104,11 +125,15 @@ public class ScriptSortBuilder extends SortBuilder implements } /** - * Defines which distance to use for sorting in the case a document contains multiple geo points. - * Possible values: min and max + * Defines which distance to use for sorting in the case a document contains multiple values.
+ * For {@link ScriptSortType#STRING}, the set of possible values is restricted to {@link SortMode#MIN} and {@link SortMode#MAX} */ public ScriptSortBuilder sortMode(SortMode sortMode) { Objects.requireNonNull(sortMode, "sort mode cannot be null."); + if (ScriptSortType.STRING.equals(type) && (sortMode == SortMode.SUM || sortMode == SortMode.AVG || + sortMode == SortMode.MEDIAN)) { + throw new IllegalArgumentException("script sort of type [string] doesn't support mode [" + sortMode + "]"); + } this.sortMode = sortMode; return this; } @@ -244,6 +269,75 @@ public class ScriptSortBuilder extends SortBuilder implements return result; } + + @Override + public SortField build(QueryShardContext context) throws IOException { + final SearchScript searchScript = context.getScriptService().search( + context.lookup(), script, ScriptContext.Standard.SEARCH, Collections.emptyMap()); + + MultiValueMode valueMode = null; + if (sortMode != null) { + valueMode = MultiValueMode.fromString(sortMode.toString()); + } + boolean reverse = (order == SortOrder.DESC); + if (valueMode == null) { + valueMode = reverse ? MultiValueMode.MAX : MultiValueMode.MIN; + } + + final Nested nested = resolveNested(context, nestedPath, nestedFilter); + final IndexFieldData.XFieldComparatorSource fieldComparatorSource; + switch (type) { + case STRING: + fieldComparatorSource = new BytesRefFieldComparatorSource(null, null, valueMode, nested) { + LeafSearchScript leafScript; + @Override + protected SortedBinaryDocValues getValues(LeafReaderContext context) throws IOException { + leafScript = searchScript.getLeafSearchScript(context); + final BinaryDocValues values = new BinaryDocValues() { + final BytesRefBuilder spare = new BytesRefBuilder(); + @Override + public BytesRef get(int docID) { + leafScript.setDocument(docID); + spare.copyChars(leafScript.run().toString()); + return spare.get(); + } + }; + return FieldData.singleton(values, null); + } + @Override + protected void setScorer(Scorer scorer) { + leafScript.setScorer(scorer); + } + }; + break; + case NUMBER: + fieldComparatorSource = new DoubleValuesComparatorSource(null, Double.MAX_VALUE, valueMode, nested) { + LeafSearchScript leafScript; + @Override + protected SortedNumericDoubleValues getValues(LeafReaderContext context) throws IOException { + leafScript = searchScript.getLeafSearchScript(context); + final NumericDoubleValues values = new NumericDoubleValues() { + @Override + public double get(int docID) { + leafScript.setDocument(docID); + return leafScript.runAsDouble(); + } + }; + return FieldData.singleton(values, null); + } + @Override + protected void setScorer(Scorer scorer) { + leafScript.setScorer(scorer); + } + }; + break; + default: + throw new QueryShardException(context, "custom script sort type [" + type + "] not supported"); + } + + return new SortField("_script", fieldComparatorSource, reverse); + } + @Override public boolean equals(Object object) { if (this == object) { diff --git a/core/src/main/java/org/elasticsearch/search/sort/ScriptSortParser.java b/core/src/main/java/org/elasticsearch/search/sort/ScriptSortParser.java index c238ad6ccaf..af8531da87c 100644 --- a/core/src/main/java/org/elasticsearch/search/sort/ScriptSortParser.java +++ b/core/src/main/java/org/elasticsearch/search/sort/ScriptSortParser.java @@ -66,7 +66,7 @@ public class ScriptSortParser implements SortParser { } @Override - public SortField parse(XContentParser parser, QueryShardContext context) throws Exception { + public SortField parse(XContentParser parser, QueryShardContext context) throws IOException { ScriptParameterParser scriptParameterParser = new ScriptParameterParser(); Script script = null; ScriptSortType type = null; @@ -140,7 +140,6 @@ public class ScriptSortParser implements SortParser { sortMode = reverse ? MultiValueMode.MAX : MultiValueMode.MIN; } - // If nested_path is specified, then wrap the `fieldComparatorSource` in a `NestedFieldComparatorSource` final Nested nested; if (nestedHelper != null && nestedHelper.getPath() != null) { BitSetProducer rootDocumentsFilter = context.bitsetFilter(Queries.newNonNestedFilter()); @@ -182,7 +181,6 @@ public class ScriptSortParser implements SortParser { }; break; case NUMBER: - // TODO: should we rather sort missing values last? fieldComparatorSource = new DoubleValuesComparatorSource(null, Double.MAX_VALUE, sortMode, nested) { LeafSearchScript leafScript; @Override diff --git a/core/src/main/java/org/elasticsearch/search/sort/SortBuilder.java b/core/src/main/java/org/elasticsearch/search/sort/SortBuilder.java index 7852af4e97e..35d59de011c 100644 --- a/core/src/main/java/org/elasticsearch/search/sort/SortBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/sort/SortBuilder.java @@ -19,12 +19,21 @@ package org.elasticsearch.search.sort; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.join.BitSetProducer; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; +import org.elasticsearch.index.mapper.object.ObjectMapper; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.QueryShardException; +import java.io.IOException; import java.util.Objects; /** @@ -32,6 +41,30 @@ import java.util.Objects; */ public abstract class SortBuilder> implements ToXContent { + protected static Nested resolveNested(QueryShardContext context, String nestedPath, QueryBuilder nestedFilter) throws IOException { + Nested nested = null; + if (nestedPath != null) { + BitSetProducer rootDocumentsFilter = context.bitsetFilter(Queries.newNonNestedFilter()); + ObjectMapper nestedObjectMapper = context.getObjectMapper(nestedPath); + if (nestedObjectMapper == null) { + throw new QueryShardException(context, "[nested] failed to find nested object under path [" + nestedPath + "]"); + } + if (!nestedObjectMapper.nested().isNested()) { + throw new QueryShardException(context, "[nested] nested object under path [" + nestedPath + "] is not of nested type"); + } + Query innerDocumentsQuery; + if (nestedFilter != null) { + context.nestedScope().nextLevel(nestedObjectMapper); + innerDocumentsQuery = QueryBuilder.rewriteQuery(nestedFilter, context).toFilter(context); + context.nestedScope().previousLevel(); + } else { + innerDocumentsQuery = nestedObjectMapper.nestedTypeFilter(); + } + nested = new Nested(rootDocumentsFilter, innerDocumentsQuery); + } + return nested; + } + protected SortOrder order = SortOrder.ASC; public static final ParseField ORDER_FIELD = new ParseField("order"); diff --git a/core/src/main/java/org/elasticsearch/search/sort/SortBuilderParser.java b/core/src/main/java/org/elasticsearch/search/sort/SortBuilderParser.java index 90d54a50121..706fa7863f0 100644 --- a/core/src/main/java/org/elasticsearch/search/sort/SortBuilderParser.java +++ b/core/src/main/java/org/elasticsearch/search/sort/SortBuilderParser.java @@ -19,9 +19,11 @@ package org.elasticsearch.search.sort; +import org.apache.lucene.search.SortField; import org.elasticsearch.common.io.stream.NamedWriteable; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; @@ -36,5 +38,10 @@ public interface SortBuilderParser extends NamedWriteable< * call * @return the new item */ - SortBuilder fromXContent(QueryParseContext context, String elementName) throws IOException; + T fromXContent(QueryParseContext context, String elementName) throws IOException; + + /** + * Create a @link {@link SortField} from this builder. + */ + SortField build(QueryShardContext context) throws IOException; } diff --git a/core/src/main/java/org/elasticsearch/search/sort/SortParseElement.java b/core/src/main/java/org/elasticsearch/search/sort/SortParseElement.java index fe0b62022fe..1ed2a457a5f 100644 --- a/core/src/main/java/org/elasticsearch/search/sort/SortParseElement.java +++ b/core/src/main/java/org/elasticsearch/search/sort/SortParseElement.java @@ -30,10 +30,11 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.QueryShardException; import org.elasticsearch.index.query.support.NestedInnerQueryParseSupport; import org.elasticsearch.search.MultiValueMode; import org.elasticsearch.search.SearchParseElement; -import org.elasticsearch.search.SearchParseException; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -49,7 +50,7 @@ import static java.util.Collections.unmodifiableMap; */ public class SortParseElement implements SearchParseElement { - public static final SortField SORT_SCORE = new SortField(null, SortField.Type.SCORE); + private static final SortField SORT_SCORE = new SortField(null, SortField.Type.SCORE); private static final SortField SORT_SCORE_REVERSE = new SortField(null, SortField.Type.SCORE, true); private static final SortField SORT_DOC = new SortField(null, SortField.Type.DOC); private static final SortField SORT_DOC_REVERSE = new SortField(null, SortField.Type.DOC, true); @@ -75,26 +76,8 @@ public class SortParseElement implements SearchParseElement { } @Override - public void parse(XContentParser parser, SearchContext context) throws Exception { - XContentParser.Token token = parser.currentToken(); - List sortFields = new ArrayList<>(2); - if (token == XContentParser.Token.START_ARRAY) { - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (token == XContentParser.Token.START_OBJECT) { - addCompoundSortField(parser, context, sortFields); - } else if (token == XContentParser.Token.VALUE_STRING) { - addSortField(context, sortFields, parser.text(), false, null, null, null, null); - } else { - throw new IllegalArgumentException("malformed sort format, within the sort array, an object, or an actual string are allowed"); - } - } - } else if (token == XContentParser.Token.VALUE_STRING) { - addSortField(context, sortFields, parser.text(), false, null, null, null, null); - } else if (token == XContentParser.Token.START_OBJECT) { - addCompoundSortField(parser, context, sortFields); - } else { - throw new IllegalArgumentException("malformed sort format, either start with array, object, or an actual string"); - } + public void parse(XContentParser parser, SearchContext context) throws IOException { + List sortFields = parse(parser, context.getQueryShardContext()); if (!sortFields.isEmpty()) { // optimize if we just sort on score non reversed, we don't really need sorting boolean sort; @@ -114,7 +97,30 @@ public class SortParseElement implements SearchParseElement { } } - private void addCompoundSortField(XContentParser parser, SearchContext context, List sortFields) throws Exception { + List parse(XContentParser parser, QueryShardContext context) throws IOException { + XContentParser.Token token = parser.currentToken(); + List sortFields = new ArrayList<>(2); + if (token == XContentParser.Token.START_ARRAY) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token == XContentParser.Token.START_OBJECT) { + addCompoundSortField(parser, context, sortFields); + } else if (token == XContentParser.Token.VALUE_STRING) { + addSortField(context, sortFields, parser.text(), false, null, null, null, null); + } else { + throw new IllegalArgumentException("malformed sort format, within the sort array, an object, or an actual string are allowed"); + } + } + } else if (token == XContentParser.Token.VALUE_STRING) { + addSortField(context, sortFields, parser.text(), false, null, null, null, null); + } else if (token == XContentParser.Token.START_OBJECT) { + addCompoundSortField(parser, context, sortFields); + } else { + throw new IllegalArgumentException("malformed sort format, either start with array, object, or an actual string"); + } + return sortFields; + } + + private void addCompoundSortField(XContentParser parser, QueryShardContext context, List sortFields) throws IOException { XContentParser.Token token; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -138,7 +144,7 @@ public class SortParseElement implements SearchParseElement { addSortField(context, sortFields, fieldName, reverse, unmappedType, missing, sortMode, nestedFilterParseHelper); } else { if (PARSERS.containsKey(fieldName)) { - sortFields.add(PARSERS.get(fieldName).parse(parser, context.getQueryShardContext())); + sortFields.add(PARSERS.get(fieldName).parse(parser, context)); } else { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -160,7 +166,7 @@ public class SortParseElement implements SearchParseElement { sortMode = MultiValueMode.fromString(parser.text()); } else if ("nested_path".equals(innerJsonName) || "nestedPath".equals(innerJsonName)) { if (nestedFilterParseHelper == null) { - nestedFilterParseHelper = new NestedInnerQueryParseSupport(parser, context.getQueryShardContext()); + nestedFilterParseHelper = new NestedInnerQueryParseSupport(parser, context); } nestedFilterParseHelper.setPath(parser.text()); } else { @@ -169,7 +175,7 @@ public class SortParseElement implements SearchParseElement { } else if (token == XContentParser.Token.START_OBJECT) { if ("nested_filter".equals(innerJsonName) || "nestedFilter".equals(innerJsonName)) { if (nestedFilterParseHelper == null) { - nestedFilterParseHelper = new NestedInnerQueryParseSupport(parser, context.getQueryShardContext()); + nestedFilterParseHelper = new NestedInnerQueryParseSupport(parser, context); } nestedFilterParseHelper.filter(); } else { @@ -184,7 +190,7 @@ public class SortParseElement implements SearchParseElement { } } - private void addSortField(SearchContext context, List sortFields, String fieldName, boolean reverse, String unmappedType, @Nullable final String missing, MultiValueMode sortMode, NestedInnerQueryParseSupport nestedHelper) throws IOException { + private void addSortField(QueryShardContext context, List sortFields, String fieldName, boolean reverse, String unmappedType, @Nullable final String missing, MultiValueMode sortMode, NestedInnerQueryParseSupport nestedHelper) throws IOException { if (SCORE_FIELD_NAME.equals(fieldName)) { if (reverse) { sortFields.add(SORT_SCORE_REVERSE); @@ -198,28 +204,19 @@ public class SortParseElement implements SearchParseElement { sortFields.add(SORT_DOC); } } else { - MappedFieldType fieldType = context.smartNameFieldType(fieldName); + MappedFieldType fieldType = context.fieldMapper(fieldName); if (fieldType == null) { if (unmappedType != null) { - fieldType = context.mapperService().unmappedFieldType(unmappedType); + fieldType = context.getMapperService().unmappedFieldType(unmappedType); } else { - throw new SearchParseException(context, "No mapping found for [" + fieldName + "] in order to sort on", null); + throw new QueryShardException(context, "No mapping found for [" + fieldName + "] in order to sort on"); } } if (!fieldType.isSortable()) { - throw new SearchParseException(context, "Sorting not supported for field[" + fieldName + "]", null); + throw new QueryShardException(context, "Sorting not supported for field[" + fieldName + "]"); } - // Enable when we also know how to detect fields that do tokenize, but only emit one token - /*if (fieldMapper instanceof StringFieldMapper) { - StringFieldMapper stringFieldMapper = (StringFieldMapper) fieldMapper; - if (stringFieldMapper.fieldType().tokenized()) { - // Fail early - throw new SearchParseException(context, "Can't sort on tokenized string field[" + fieldName + "]"); - } - }*/ - // We only support AVG and SUM on number based fields if (fieldType.isNumeric() == false && (sortMode == MultiValueMode.SUM || sortMode == MultiValueMode.AVG)) { sortMode = null; @@ -230,7 +227,7 @@ public class SortParseElement implements SearchParseElement { final Nested nested; if (nestedHelper != null && nestedHelper.getPath() != null) { - BitSetProducer rootDocumentsFilter = context.bitsetFilterCache().getBitSetProducer(Queries.newNonNestedFilter()); + BitSetProducer rootDocumentsFilter = context.bitsetFilter(Queries.newNonNestedFilter()); Query innerDocumentsQuery; if (nestedHelper.filterFound()) { innerDocumentsQuery = nestedHelper.getInnerFilter(); @@ -242,7 +239,7 @@ public class SortParseElement implements SearchParseElement { nested = null; } - IndexFieldData.XFieldComparatorSource fieldComparatorSource = context.fieldData().getForField(fieldType) + IndexFieldData.XFieldComparatorSource fieldComparatorSource = context.getForField(fieldType) .comparatorSource(missing, sortMode, nested); sortFields.add(new SortField(fieldType.name(), fieldComparatorSource, reverse)); } diff --git a/core/src/main/java/org/elasticsearch/search/sort/SortParser.java b/core/src/main/java/org/elasticsearch/search/sort/SortParser.java index 727e576a85e..519d9adb957 100644 --- a/core/src/main/java/org/elasticsearch/search/sort/SortParser.java +++ b/core/src/main/java/org/elasticsearch/search/sort/SortParser.java @@ -23,6 +23,8 @@ import org.apache.lucene.search.SortField; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardContext; +import java.io.IOException; + /** * */ @@ -30,5 +32,5 @@ public interface SortParser { String[] names(); - SortField parse(XContentParser parser, QueryShardContext context) throws Exception; + SortField parse(XContentParser parser, QueryShardContext context) throws IOException; } diff --git a/core/src/test/java/org/elasticsearch/bootstrap/BootstrapCheckTests.java b/core/src/test/java/org/elasticsearch/bootstrap/BootstrapCheckTests.java index 3c269c39004..3e5dc892889 100644 --- a/core/src/test/java/org/elasticsearch/bootstrap/BootstrapCheckTests.java +++ b/core/src/test/java/org/elasticsearch/bootstrap/BootstrapCheckTests.java @@ -157,6 +157,33 @@ public class BootstrapCheckTests extends ESTestCase { BootstrapCheck.check(true, Collections.singletonList(check)); } + public void testMaxSizeVirtualMemory() { + final long limit = JNACLibrary.RLIM_INFINITY; + final AtomicLong maxSizeVirtualMemory = new AtomicLong(randomInt()); + final BootstrapCheck.MaxSizeVirtualMemoryCheck check = new BootstrapCheck.MaxSizeVirtualMemoryCheck() { + @Override + long getMaxSizeVirtualMemory() { + return maxSizeVirtualMemory.get(); + } + }; + + try { + BootstrapCheck.check(true, Collections.singletonList(check)); + fail("should have failed due to max size virtual memory too low"); + } catch (final RuntimeException e) { + assertThat(e.getMessage(), containsString("max size virtual memory")); + } + + maxSizeVirtualMemory.set(limit); + + BootstrapCheck.check(true, Collections.singletonList(check)); + + // nothing should happen if max size virtual memory is not + // available + maxSizeVirtualMemory.set(Long.MIN_VALUE); + BootstrapCheck.check(true, Collections.singletonList(check)); + } + public void testEnforceLimits() { final Set enforceSettings = BootstrapCheck.enforceSettings(); final Setting setting = randomFrom(Arrays.asList(enforceSettings.toArray(new Setting[enforceSettings.size()]))); diff --git a/core/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java b/core/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java index af2894833cb..55955ba269d 100644 --- a/core/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java +++ b/core/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java @@ -28,6 +28,7 @@ import org.elasticsearch.client.Requests; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MappingMetaData; +import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.common.Priority; import org.elasticsearch.common.logging.ESLogger; @@ -35,6 +36,7 @@ import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.discovery.zen.elect.ElectMasterService; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.mapper.MapperParsingException; @@ -46,6 +48,8 @@ import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.ESIntegTestCase.Scope; import org.elasticsearch.test.InternalTestCluster.RestartCallback; +import java.io.IOException; + import static org.elasticsearch.common.settings.Settings.settingsBuilder; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.index.query.QueryBuilders.matchQuery; @@ -65,14 +69,16 @@ public class GatewayIndexStateIT extends ESIntegTestCase { logger.info("--> creating test index, with meta routing"); client().admin().indices().prepareCreate("test") - .addMapping("type1", XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("_routing").field("required", true).endObject().endObject().endObject()) + .addMapping("type1", XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("_routing") + .field("required", true).endObject().endObject().endObject()) .execute().actionGet(); logger.info("--> waiting for yellow status"); ensureYellow(); logger.info("--> verify meta _routing required exists"); - MappingMetaData mappingMd = client().admin().cluster().prepareState().execute().actionGet().getState().metaData().index("test").mapping("type1"); + MappingMetaData mappingMd = client().admin().cluster().prepareState().execute().actionGet().getState().metaData() + .index("test").mapping("type1"); assertThat(mappingMd.routing().required(), equalTo(true)); logger.info("--> restarting nodes..."); @@ -101,7 +107,8 @@ public class GatewayIndexStateIT extends ESIntegTestCase { ClusterStateResponse stateResponse = client().admin().cluster().prepareState().execute().actionGet(); assertThat(stateResponse.getState().metaData().index("test").getState(), equalTo(IndexMetaData.State.OPEN)); assertThat(stateResponse.getState().routingTable().index("test").shards().size(), equalTo(test.numPrimaries)); - assertThat(stateResponse.getState().routingTable().index("test").shardsWithState(ShardRoutingState.STARTED).size(), equalTo(test.totalNumShards)); + assertThat(stateResponse.getState().routingTable().index("test").shardsWithState(ShardRoutingState.STARTED).size(), + equalTo(test.totalNumShards)); logger.info("--> indexing a simple document"); client().prepareIndex("test", "type1", "1").setSource("field1", "value1").get(); @@ -138,7 +145,8 @@ public class GatewayIndexStateIT extends ESIntegTestCase { stateResponse = client().admin().cluster().prepareState().execute().actionGet(); assertThat(stateResponse.getState().metaData().index("test").getState(), equalTo(IndexMetaData.State.OPEN)); assertThat(stateResponse.getState().routingTable().index("test").shards().size(), equalTo(test.numPrimaries)); - assertThat(stateResponse.getState().routingTable().index("test").shardsWithState(ShardRoutingState.STARTED).size(), equalTo(test.totalNumShards)); + assertThat(stateResponse.getState().routingTable().index("test").shardsWithState(ShardRoutingState.STARTED).size(), + equalTo(test.totalNumShards)); logger.info("--> trying to get the indexed document on the first index"); GetResponse getResponse = client().prepareGet("test", "type1", "1").execute().actionGet(); @@ -176,7 +184,8 @@ public class GatewayIndexStateIT extends ESIntegTestCase { stateResponse = client().admin().cluster().prepareState().execute().actionGet(); assertThat(stateResponse.getState().metaData().index("test").getState(), equalTo(IndexMetaData.State.OPEN)); assertThat(stateResponse.getState().routingTable().index("test").shards().size(), equalTo(test.numPrimaries)); - assertThat(stateResponse.getState().routingTable().index("test").shardsWithState(ShardRoutingState.STARTED).size(), equalTo(test.totalNumShards)); + assertThat(stateResponse.getState().routingTable().index("test").shardsWithState(ShardRoutingState.STARTED).size(), + equalTo(test.totalNumShards)); logger.info("--> trying to get the indexed document on the first round (before close and shutdown)"); getResponse = client().prepareGet("test", "type1", "1").execute().actionGet(); @@ -202,7 +211,8 @@ public class GatewayIndexStateIT extends ESIntegTestCase { internalCluster().startNode(settingsBuilder().put(Node.NODE_DATA_SETTING.getKey(), false).build()); logger.info("--> waiting for test index to be created"); - ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setIndices("test").execute().actionGet(); + ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setIndices("test") + .execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); logger.info("--> verify we have an index"); @@ -236,7 +246,8 @@ public class GatewayIndexStateIT extends ESIntegTestCase { client().prepareIndex("test", "type1", "1").setSource("field1", "value1").setRefresh(true).execute().actionGet(); logger.info("--> waiting for green status"); - ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("2").execute().actionGet(); + ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus() + .setWaitForNodes("2").execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); logger.info("--> verify 1 doc in the index"); @@ -255,7 +266,8 @@ public class GatewayIndexStateIT extends ESIntegTestCase { client().admin().indices().prepareOpen("test").execute().actionGet(); logger.info("--> waiting for green status"); - health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("2").execute().actionGet(); + health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("2") + .execute().actionGet(); assertThat(health.isTimedOut(), equalTo(false)); logger.info("--> verify 1 doc in the index"); @@ -300,7 +312,8 @@ public class GatewayIndexStateIT extends ESIntegTestCase { ensureGreen(); // make sure that any other events were processed - assertFalse(client().admin().cluster().prepareHealth().setWaitForRelocatingShards(0).setWaitForEvents(Priority.LANGUID).get().isTimedOut()); + assertFalse(client().admin().cluster().prepareHealth().setWaitForRelocatingShards(0).setWaitForEvents(Priority.LANGUID).get() + .isTimedOut()); logger.info("--> verify we read the right thing through alias"); assertThat(client().prepareGet("test", "type1", "2").execute().actionGet().isExists(), equalTo(true)); @@ -492,4 +505,44 @@ public class GatewayIndexStateIT extends ESIntegTestCase { logger.info("--> verify 1 doc in the index"); assertHitCount(client().prepareSearch().setQuery(matchQuery("field1", "value one")).get(), 1L); } + + public void testArchiveBrokenClusterSettings() throws Exception { + logger.info("--> starting one node"); + internalCluster().startNode(); + client().prepareIndex("test", "type1", "1").setSource("field1", "value1").setRefresh(true).execute().actionGet(); + logger.info("--> waiting for green status"); + if (usually()) { + ensureYellow(); + } else { + internalCluster().startNode(); + client().admin().cluster() + .health(Requests.clusterHealthRequest() + .waitForGreenStatus() + .waitForEvents(Priority.LANGUID) + .waitForRelocatingShards(0).waitForNodes("2")).actionGet(); + } + ClusterState state = client().admin().cluster().prepareState().get().getState(); + MetaData metaData = state.getMetaData(); + for (NodeEnvironment nodeEnv : internalCluster().getInstances(NodeEnvironment.class)) { + MetaData brokenMeta = MetaData.builder(metaData).persistentSettings(Settings.builder() + .put(metaData.persistentSettings()).put("this.is.unknown", true) + .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), "broken").build()).build(); + MetaData.FORMAT.write(brokenMeta, metaData.version(), nodeEnv.nodeDataPaths()); + } + internalCluster().fullRestart(); + ensureYellow("test"); // wait for state recovery + state = client().admin().cluster().prepareState().get().getState(); + assertEquals("true", state.metaData().persistentSettings().get("archived.this.is.unknown")); + assertEquals("broken", state.metaData().persistentSettings().get("archived." + + ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey())); + + // delete these settings + client().admin().cluster().prepareUpdateSettings().setPersistentSettings(Settings.builder().putNull("archived.*")).get(); + + state = client().admin().cluster().prepareState().get().getState(); + assertNull(state.metaData().persistentSettings().get("archived.this.is.unknown")); + assertNull(state.metaData().persistentSettings().get("archived." + + ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey())); + assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1L); + } } diff --git a/core/src/test/java/org/elasticsearch/index/IndexSettingsTests.java b/core/src/test/java/org/elasticsearch/index/IndexSettingsTests.java index 46d99e3b4bc..9d74b1ce429 100644 --- a/core/src/test/java/org/elasticsearch/index/IndexSettingsTests.java +++ b/core/src/test/java/org/elasticsearch/index/IndexSettingsTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.index; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaDataIndexUpgradeService; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.IndexScopedSettings; import org.elasticsearch.common.settings.Setting; @@ -28,10 +29,12 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.translog.Translog; +import org.elasticsearch.indices.mapper.MapperRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.VersionUtils; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -40,10 +43,10 @@ import java.util.function.Function; public class IndexSettingsTests extends ESTestCase { - public void testRunListener() { Version version = VersionUtils.getPreviousVersion(); - Settings theSettings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, version).put(IndexMetaData.SETTING_INDEX_UUID, "0xdeadbeef").build(); + Settings theSettings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, version) + .put(IndexMetaData.SETTING_INDEX_UUID, "0xdeadbeef").build(); final AtomicInteger integer = new AtomicInteger(0); Setting integerSetting = Setting.intSetting("index.test.setting.int", -1, Property.Dynamic, Property.IndexScope); @@ -57,7 +60,8 @@ public class IndexSettingsTests extends ESTestCase { assertFalse(settings.updateIndexMetaData(metaData)); assertEquals(metaData.getSettings().getAsMap(), settings.getSettings().getAsMap()); assertEquals(0, integer.get()); - assertTrue(settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(theSettings).put("index.test.setting.int", 42).build()))); + assertTrue(settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(theSettings).put("index.test.setting.int", 42) + .build()))); assertEquals(42, integer.get()); } @@ -77,13 +81,15 @@ public class IndexSettingsTests extends ESTestCase { settings.getScopedSettings().addSettingsUpdateConsumer(notUpdated, builder::append); assertEquals(0, integer.get()); assertEquals("", builder.toString()); - IndexMetaData newMetaData = newIndexMeta("index", Settings.builder().put(settings.getIndexMetaData().getSettings()).put("index.test.setting.int", 42).build()); + IndexMetaData newMetaData = newIndexMeta("index", Settings.builder().put(settings.getIndexMetaData().getSettings()) + .put("index.test.setting.int", 42).build()); assertTrue(settings.updateIndexMetaData(newMetaData)); assertSame(settings.getIndexMetaData(), newMetaData); assertEquals(42, integer.get()); assertEquals("", builder.toString()); integer.set(0); - assertTrue(settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(settings.getIndexMetaData().getSettings()).put("index.not.updated", "boom").build()))); + assertTrue(settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(settings.getIndexMetaData().getSettings()) + .put("index.not.updated", "boom").build()))); assertEquals("boom", builder.toString()); assertEquals("not updated - we preserve the old settings", 0, integer.get()); @@ -91,21 +97,25 @@ public class IndexSettingsTests extends ESTestCase { public void testSettingsConsistency() { Version version = VersionUtils.getPreviousVersion(); - IndexMetaData metaData = newIndexMeta("index", Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build()); + IndexMetaData metaData = newIndexMeta("index", Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, version) + .build()); IndexSettings settings = new IndexSettings(metaData, Settings.EMPTY); assertEquals(version, settings.getIndexVersionCreated()); assertEquals("_na_", settings.getUUID()); try { - settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).put("index.test.setting.int", 42).build())); + settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, + Version.CURRENT).put("index.test.setting.int", 42).build())); fail("version has changed"); } catch (IllegalArgumentException ex) { assertTrue(ex.getMessage(), ex.getMessage().startsWith("version mismatch on settings update expected: ")); } - metaData = newIndexMeta("index", Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetaData.SETTING_INDEX_UUID, "0xdeadbeef").build()); + metaData = newIndexMeta("index", Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetaData.SETTING_INDEX_UUID, "0xdeadbeef").build()); settings = new IndexSettings(metaData, Settings.EMPTY); try { - settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).put("index.test.setting.int", 42).build())); + settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, + Version.CURRENT).put("index.test.setting.int", 42).build())); fail("uuid missing/change"); } catch (IllegalArgumentException ex) { assertEquals("uuid mismatch on settings update expected: 0xdeadbeef but was: _na_", ex.getMessage()); @@ -118,7 +128,8 @@ public class IndexSettingsTests extends ESTestCase { if (settings.length > 0) { settingSet.addAll(Arrays.asList(settings)); } - return new IndexSettings(metaData, nodeSettings, (idx) -> Regex.simpleMatch(idx, metaData.getIndex().getName()), new IndexScopedSettings(Settings.EMPTY, settingSet)); + return new IndexSettings(metaData, nodeSettings, (idx) -> Regex.simpleMatch(idx, metaData.getIndex().getName()), + new IndexScopedSettings(Settings.EMPTY, settingSet)); } @@ -172,7 +183,8 @@ public class IndexSettingsTests extends ESTestCase { .build()); IndexSettings settings = new IndexSettings(metaData, Settings.EMPTY); assertEquals(Translog.Durability.ASYNC, settings.getTranslogDurability()); - settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), "request").build())); + settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), + "request").build())); assertEquals(Translog.Durability.REQUEST, settings.getTranslogDurability()); metaData = newIndexMeta("index", Settings.settingsBuilder() @@ -189,7 +201,8 @@ public class IndexSettingsTests extends ESTestCase { .build()); IndexSettings settings = new IndexSettings(metaData, Settings.EMPTY); assertFalse(settings.isWarmerEnabled()); - settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_WARMER_ENABLED_SETTING.getKey(), "true").build())); + settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_WARMER_ENABLED_SETTING.getKey(), + "true").build())); assertTrue(settings.isWarmerEnabled()); metaData = newIndexMeta("index", Settings.settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) @@ -205,10 +218,13 @@ public class IndexSettingsTests extends ESTestCase { .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), refreshInterval) .build()); IndexSettings settings = new IndexSettings(metaData, Settings.EMPTY); - assertEquals(TimeValue.parseTimeValue(refreshInterval, new TimeValue(1, TimeUnit.DAYS), IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey()), settings.getRefreshInterval()); + assertEquals(TimeValue.parseTimeValue(refreshInterval, new TimeValue(1, TimeUnit.DAYS), + IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey()), settings.getRefreshInterval()); String newRefreshInterval = getRandomTimeString(); - settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), newRefreshInterval).build())); - assertEquals(TimeValue.parseTimeValue(newRefreshInterval, new TimeValue(1, TimeUnit.DAYS), IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey()), settings.getRefreshInterval()); + settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), + newRefreshInterval).build())); + assertEquals(TimeValue.parseTimeValue(newRefreshInterval, new TimeValue(1, TimeUnit.DAYS), + IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey()), settings.getRefreshInterval()); } private String getRandomTimeString() { @@ -227,7 +243,8 @@ public class IndexSettingsTests extends ESTestCase { .build()); IndexSettings settings = new IndexSettings(metaData, Settings.EMPTY); assertEquals(15, settings.getMaxResultWindow()); - settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey(), 42).build())); + settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey(), + 42).build())); assertEquals(42, settings.getMaxResultWindow()); settings.updateIndexMetaData(newIndexMeta("index", Settings.EMPTY)); assertEquals(IndexSettings.MAX_RESULT_WINDOW_SETTING.get(Settings.EMPTY).intValue(), settings.getMaxResultWindow()); @@ -246,11 +263,15 @@ public class IndexSettingsTests extends ESTestCase { .put(IndexSettings.INDEX_GC_DELETES_SETTING.getKey(), gcDeleteSetting.getStringRep()) .build()); IndexSettings settings = new IndexSettings(metaData, Settings.EMPTY); - assertEquals(TimeValue.parseTimeValue(gcDeleteSetting.getStringRep(), new TimeValue(1, TimeUnit.DAYS), IndexSettings.INDEX_GC_DELETES_SETTING.getKey()).getMillis(), settings.getGcDeletesInMillis()); + assertEquals(TimeValue.parseTimeValue(gcDeleteSetting.getStringRep(), new TimeValue(1, TimeUnit.DAYS), + IndexSettings.INDEX_GC_DELETES_SETTING.getKey()).getMillis(), settings.getGcDeletesInMillis()); TimeValue newGCDeleteSetting = new TimeValue(Math.abs(randomInt()), TimeUnit.MILLISECONDS); - settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_GC_DELETES_SETTING.getKey(), newGCDeleteSetting.getStringRep()).build())); - assertEquals(TimeValue.parseTimeValue(newGCDeleteSetting.getStringRep(), new TimeValue(1, TimeUnit.DAYS), IndexSettings.INDEX_GC_DELETES_SETTING.getKey()).getMillis(), settings.getGcDeletesInMillis()); - settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_GC_DELETES_SETTING.getKey(), randomBoolean() ? -1 : new TimeValue(-1, TimeUnit.MILLISECONDS)).build())); + settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_GC_DELETES_SETTING.getKey(), + newGCDeleteSetting.getStringRep()).build())); + assertEquals(TimeValue.parseTimeValue(newGCDeleteSetting.getStringRep(), new TimeValue(1, TimeUnit.DAYS), + IndexSettings.INDEX_GC_DELETES_SETTING.getKey()).getMillis(), settings.getGcDeletesInMillis()); + settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_GC_DELETES_SETTING.getKey(), + randomBoolean() ? -1 : new TimeValue(-1, TimeUnit.MILLISECONDS)).build())); assertEquals(-1, settings.getGcDeletesInMillis()); } @@ -261,7 +282,8 @@ public class IndexSettingsTests extends ESTestCase { .build()); IndexSettings settings = new IndexSettings(metaData, Settings.EMPTY); assertFalse(settings.isTTLPurgeDisabled()); - settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_TTL_DISABLE_PURGE_SETTING.getKey(), "true").build())); + settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_TTL_DISABLE_PURGE_SETTING.getKey(), + "true").build())); assertTrue(settings.isTTLPurgeDisabled()); settings.updateIndexMetaData(newIndexMeta("index", Settings.EMPTY)); @@ -276,7 +298,8 @@ public class IndexSettingsTests extends ESTestCase { public void testTranslogFlushSizeThreshold() { ByteSizeValue translogFlushThresholdSize = new ByteSizeValue(Math.abs(randomInt())); - ByteSizeValue actualValue = ByteSizeValue.parseBytesSizeValue(translogFlushThresholdSize.toString(), IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey()); + ByteSizeValue actualValue = ByteSizeValue.parseBytesSizeValue(translogFlushThresholdSize.toString(), + IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey()); IndexMetaData metaData = newIndexMeta("index", Settings.settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), translogFlushThresholdSize.toString()) @@ -284,8 +307,33 @@ public class IndexSettingsTests extends ESTestCase { IndexSettings settings = new IndexSettings(metaData, Settings.EMPTY); assertEquals(actualValue, settings.getFlushThresholdSize()); ByteSizeValue newTranslogFlushThresholdSize = new ByteSizeValue(Math.abs(randomInt())); - ByteSizeValue actualNewTranslogFlushThresholdSize = ByteSizeValue.parseBytesSizeValue(newTranslogFlushThresholdSize.toString(), IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey()); - settings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), newTranslogFlushThresholdSize.toString()).build())); + ByteSizeValue actualNewTranslogFlushThresholdSize = ByteSizeValue.parseBytesSizeValue(newTranslogFlushThresholdSize.toString(), + IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey()); + settings.updateIndexMetaData(newIndexMeta("index", Settings.builder() + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), newTranslogFlushThresholdSize.toString()).build())); assertEquals(actualNewTranslogFlushThresholdSize, settings.getFlushThresholdSize()); } + + + public void testArchiveBrokenIndexSettings() { + Settings settings = IndexScopedSettings.DEFAULT_SCOPED_SETTINGS.archiveUnknownOrBrokenSettings(Settings.EMPTY); + assertSame(settings, Settings.EMPTY); + settings = IndexScopedSettings.DEFAULT_SCOPED_SETTINGS.archiveUnknownOrBrokenSettings(Settings.builder() + .put("index.refresh_interval", "-200").build()); + assertEquals("-200", settings.get("archived.index.refresh_interval")); + assertNull(settings.get("index.refresh_interval")); + + Settings prevSettings = settings; // no double archive + settings = IndexScopedSettings.DEFAULT_SCOPED_SETTINGS.archiveUnknownOrBrokenSettings(prevSettings); + assertSame(prevSettings, settings); + + settings = IndexScopedSettings.DEFAULT_SCOPED_SETTINGS.archiveUnknownOrBrokenSettings(Settings.builder() + .put("index.version.created", Version.CURRENT.id) // private setting + .put("index.unknown", "foo") + .put("index.refresh_interval", "2s").build()); + + assertEquals("foo", settings.get("archived.index.unknown")); + assertEquals(Integer.toString(Version.CURRENT.id), settings.get("index.version.created")); + assertEquals("2s", settings.get("index.refresh_interval")); + } } diff --git a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java index 061e0d9d29f..37943773cef 100644 --- a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java +++ b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java @@ -21,8 +21,23 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; import org.elasticsearch.test.ESTestCase; +import java.io.BufferedWriter; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.MalformedInputException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; + import static org.elasticsearch.common.settings.Settings.settingsBuilder; import static org.hamcrest.Matchers.is; @@ -42,4 +57,55 @@ public class AnalysisTests extends ESTestCase { assertThat(set.contains("bar"), is(true)); assertThat(set.contains("baz"), is(false)); } + + public void testParseNonExistingFile() { + Path tempDir = createTempDir(); + Settings nodeSettings = Settings.builder() + .put("foo.bar_path", tempDir.resolve("foo.dict")) + .put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build(); + Environment env = new Environment(nodeSettings); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, + () -> Analysis.getWordList(env, nodeSettings, "foo.bar")); + assertEquals("IOException while reading foo.bar_path: " + tempDir.resolve("foo.dict").toString(), ex.getMessage()); + assertTrue(ex.getCause().toString(), ex.getCause() instanceof FileNotFoundException + || ex.getCause() instanceof NoSuchFileException); + } + + + public void testParseFalseEncodedFile() throws IOException { + Path tempDir = createTempDir(); + Path dict = tempDir.resolve("foo.dict"); + Settings nodeSettings = Settings.builder() + .put("foo.bar_path", dict) + .put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build(); + try (OutputStream writer = Files.newOutputStream(dict)) { + writer.write(new byte[]{(byte) 0xff, 0x00, 0x00}); // some invalid UTF-8 + writer.write('\n'); + } + Environment env = new Environment(nodeSettings); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, + () -> Analysis.getWordList(env, nodeSettings, "foo.bar")); + assertEquals("Unsupported character encoding detected while reading foo.bar_path: " + tempDir.resolve("foo.dict").toString() + + " - files must be UTF-8 encoded" , ex.getMessage()); + assertTrue(ex.getCause().toString(), ex.getCause() instanceof MalformedInputException + || ex.getCause() instanceof CharacterCodingException); + } + + public void testParseWordList() throws IOException { + Path tempDir = createTempDir(); + Path dict = tempDir.resolve("foo.dict"); + Settings nodeSettings = Settings.builder() + .put("foo.bar_path", dict) + .put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build(); + try (BufferedWriter writer = Files.newBufferedWriter(dict, StandardCharsets.UTF_8)) { + writer.write("hello"); + writer.write('\n'); + writer.write("world"); + writer.write('\n'); + } + Environment env = new Environment(nodeSettings); + List wordList = Analysis.getWordList(env, nodeSettings, "foo.bar"); + assertEquals(Arrays.asList("hello", "world"), wordList); + + } } diff --git a/core/src/test/java/org/elasticsearch/index/mapper/DynamicTemplateTests.java b/core/src/test/java/org/elasticsearch/index/mapper/DynamicTemplateTests.java new file mode 100644 index 00000000000..8ee8332b708 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/index/mapper/DynamicTemplateTests.java @@ -0,0 +1,93 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.Version; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.index.mapper.object.DynamicTemplate; +import org.elasticsearch.test.ESTestCase; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class DynamicTemplateTests extends ESTestCase { + + public void testParseUnknownParam() throws Exception { + Map templateDef = new HashMap<>(); + templateDef.put("match_mapping_type", "string"); + templateDef.put("mapping", Collections.singletonMap("store", true)); + templateDef.put("random_param", "random_value"); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> DynamicTemplate.parse("my_template", templateDef, Version.V_5_0_0)); + assertEquals("Illegal dynamic template parameter: [random_param]", e.getMessage()); + + // but no issues on 2.x for bw compat + DynamicTemplate template = DynamicTemplate.parse("my_template", templateDef, Version.V_2_3_0); + XContentBuilder builder = JsonXContent.contentBuilder(); + template.toXContent(builder, ToXContent.EMPTY_PARAMS); + assertEquals("{\"match_mapping_type\":\"string\",\"mapping\":{\"store\":true}}", builder.string()); + } + + public void testSerialization() throws Exception { + // type-based template + Map templateDef = new HashMap<>(); + templateDef.put("match_mapping_type", "string"); + templateDef.put("mapping", Collections.singletonMap("store", true)); + DynamicTemplate template = DynamicTemplate.parse("my_template", templateDef, Version.V_5_0_0); + XContentBuilder builder = JsonXContent.contentBuilder(); + template.toXContent(builder, ToXContent.EMPTY_PARAMS); + assertEquals("{\"match_mapping_type\":\"string\",\"mapping\":{\"store\":true}}", builder.string()); + + // name-based template + templateDef = new HashMap<>(); + templateDef.put("match", "*name"); + templateDef.put("unmatch", "first_name"); + templateDef.put("mapping", Collections.singletonMap("store", true)); + template = DynamicTemplate.parse("my_template", templateDef, Version.V_5_0_0); + builder = JsonXContent.contentBuilder(); + template.toXContent(builder, ToXContent.EMPTY_PARAMS); + assertEquals("{\"match\":\"*name\",\"unmatch\":\"first_name\",\"mapping\":{\"store\":true}}", builder.string()); + + // path-based template + templateDef = new HashMap<>(); + templateDef.put("path_match", "*name"); + templateDef.put("path_unmatch", "first_name"); + templateDef.put("mapping", Collections.singletonMap("store", true)); + template = DynamicTemplate.parse("my_template", templateDef, Version.V_5_0_0); + builder = JsonXContent.contentBuilder(); + template.toXContent(builder, ToXContent.EMPTY_PARAMS); + assertEquals("{\"path_match\":\"*name\",\"path_unmatch\":\"first_name\",\"mapping\":{\"store\":true}}", + builder.string()); + + // regex matching + templateDef = new HashMap<>(); + templateDef.put("match", "^a$"); + templateDef.put("match_pattern", "regex"); + templateDef.put("mapping", Collections.singletonMap("store", true)); + template = DynamicTemplate.parse("my_template", templateDef, Version.V_5_0_0); + builder = JsonXContent.contentBuilder(); + template.toXContent(builder, ToXContent.EMPTY_PARAMS); + assertEquals("{\"match\":\"^a$\",\"match_pattern\":\"regex\",\"mapping\":{\"store\":true}}", builder.string()); + } +} diff --git a/core/src/test/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldTypeTests.java b/core/src/test/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldTypeTests.java index fd0c344c26b..f6ed0cf931d 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldTypeTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldTypeTests.java @@ -18,6 +18,9 @@ */ package org.elasticsearch.index.mapper.internal; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; import org.junit.Before; @@ -38,4 +41,15 @@ public class FieldNamesFieldTypeTests extends FieldTypeTestCase { } }); } + + public void testTermQuery() { + FieldNamesFieldMapper.FieldNamesFieldType type = new FieldNamesFieldMapper.FieldNamesFieldType(); + type.setName(FieldNamesFieldMapper.CONTENT_TYPE); + type.setEnabled(true); + Query termQuery = type.termQuery("field_name", null); + assertEquals(new TermQuery(new Term(FieldNamesFieldMapper.CONTENT_TYPE, "field_name")), termQuery); + type.setEnabled(false); + IllegalStateException e = expectThrows(IllegalStateException.class, () -> type.termQuery("field_name", null)); + assertEquals("Cannot run [exists] queries if the [_field_names] field is disabled", e.getMessage()); + } } diff --git a/core/src/test/java/org/elasticsearch/index/query/ExistsQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/ExistsQueryBuilderTests.java index 0dc3a5ff90e..69fd843ad42 100644 --- a/core/src/test/java/org/elasticsearch/index/query/ExistsQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/ExistsQueryBuilderTests.java @@ -24,7 +24,6 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.Query; import org.elasticsearch.cluster.metadata.MetaData; -import org.elasticsearch.index.mapper.object.ObjectMapper; import java.io.IOException; import java.util.Collection; @@ -55,13 +54,8 @@ public class ExistsQueryBuilderTests extends AbstractQueryTestCase fields = context.simpleMatchToIndexNames(fieldPattern); - if (getCurrentTypes().length == 0 || fields.size() == 0) { + if (getCurrentTypes().length == 0) { assertThat(query, instanceOf(BooleanQuery.class)); BooleanQuery booleanQuery = (BooleanQuery) query; assertThat(booleanQuery.clauses().size(), equalTo(0)); diff --git a/core/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java index 30e32c92da2..eb373539994 100644 --- a/core/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java @@ -430,10 +430,8 @@ public class RangeQueryBuilderTests extends AbstractQueryTestCase> FieldStats get(String field) throws IOException { + assertThat(field, equalTo(fieldName)); + return (FieldStats) new FieldStats.Date(randomLong(), randomLong(), randomLong(), randomLong(), + shardMinValue.getMillis(), shardMaxValue.getMillis(), null); + } + }; + queryShardContext.setFieldStatsProvider(fieldStatsProvider); + QueryBuilder rewritten = query.rewrite(queryShardContext); + assertThat(rewritten, instanceOf(RangeQueryBuilder.class)); + RangeQueryBuilder rewrittenRange = (RangeQueryBuilder) rewritten; + assertThat(rewrittenRange.fieldName(), equalTo(fieldName)); + assertThat(rewrittenRange.from(), equalTo(null)); + assertThat(rewrittenRange.to(), equalTo(null)); } public void testRewriteDateToMatchNone() throws IOException { @@ -773,6 +798,27 @@ public class RangeQueryBuilderTests extends AbstractQueryTestCase rewritten = query.rewrite(queryShardContext); + assertThat(rewritten, instanceOf(MatchNoneQueryBuilder.class)); + } + public void testRewriteDateToSame() throws IOException { String fieldName = randomAsciiOfLengthBetween(1, 20); RangeQueryBuilder query = new RangeQueryBuilder(fieldName); @@ -793,4 +839,25 @@ public class RangeQueryBuilderTests extends AbstractQueryTestCase rewritten = query.rewrite(queryShardContext); assertThat(rewritten, sameInstance(query)); } + + public void testRewriteDateWithNowToSame() throws IOException { + String fieldName = randomAsciiOfLengthBetween(1, 20); + RangeQueryBuilder query = new RangeQueryBuilder(fieldName); + String queryFromValue = "now-2d"; + String queryToValue = "now"; + query.from(queryFromValue); + query.to(queryToValue); + QueryShardContext queryShardContext = queryShardContext(); + FieldStatsProvider fieldStatsProvider = new FieldStatsProvider(null, null) { + + @Override + public Relation isFieldWithinQuery(String fieldName, Object from, Object to, boolean includeLower, boolean includeUpper, + DateTimeZone timeZone, DateMathParser dateMathParser) throws IOException { + return Relation.INTERSECTS; + } + }; + queryShardContext.setFieldStatsProvider(fieldStatsProvider); + QueryBuilder rewritten = query.rewrite(queryShardContext); + assertThat(rewritten, sameInstance(query)); + } } diff --git a/core/src/test/java/org/elasticsearch/indices/IndicesRequestCacheIT.java b/core/src/test/java/org/elasticsearch/indices/IndicesRequestCacheIT.java index 94c41e5c84e..fec50cf0a27 100644 --- a/core/src/test/java/org/elasticsearch/indices/IndicesRequestCacheIT.java +++ b/core/src/test/java/org/elasticsearch/indices/IndicesRequestCacheIT.java @@ -27,7 +27,10 @@ import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInter import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket; import org.elasticsearch.test.ESIntegTestCase; +import org.joda.time.DateTime; import org.joda.time.DateTimeZone; +import org.joda.time.chrono.ISOChronology; + import java.util.List; import static org.elasticsearch.search.aggregations.AggregationBuilders.dateHistogram; @@ -233,4 +236,122 @@ public class IndicesRequestCacheIT extends ESIntegTestCase { equalTo(1L)); } + public void testQueryRewriteDatesWithNow() throws Exception { + assertAcked(client().admin().indices().prepareCreate("index-1").addMapping("type", "d", "type=date") + .setSettings(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true, IndexMetaData.SETTING_NUMBER_OF_SHARDS, + 1, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) + .get()); + assertAcked(client().admin().indices().prepareCreate("index-2").addMapping("type", "d", "type=date") + .setSettings(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true, IndexMetaData.SETTING_NUMBER_OF_SHARDS, + 1, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) + .get()); + assertAcked(client().admin().indices().prepareCreate("index-3").addMapping("type", "d", "type=date") + .setSettings(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true, IndexMetaData.SETTING_NUMBER_OF_SHARDS, + 1, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) + .get()); + DateTime now = new DateTime(ISOChronology.getInstanceUTC()); + indexRandom(true, client().prepareIndex("index-1", "type", "1").setSource("d", now), + client().prepareIndex("index-1", "type", "2").setSource("d", now.minusDays(1)), + client().prepareIndex("index-1", "type", "3").setSource("d", now.minusDays(2)), + client().prepareIndex("index-2", "type", "4").setSource("d", now.minusDays(3)), + client().prepareIndex("index-2", "type", "5").setSource("d", now.minusDays(4)), + client().prepareIndex("index-2", "type", "6").setSource("d", now.minusDays(5)), + client().prepareIndex("index-3", "type", "7").setSource("d", now.minusDays(6)), + client().prepareIndex("index-3", "type", "8").setSource("d", now.minusDays(7)), + client().prepareIndex("index-3", "type", "9").setSource("d", now.minusDays(8))); + ensureSearchable("index-1", "index-2", "index-3"); + + assertThat( + client().admin().indices().prepareStats("index-1").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(0L)); + assertThat( + client().admin().indices().prepareStats("index-1").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(0L)); + + assertThat( + client().admin().indices().prepareStats("index-2").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(0L)); + assertThat( + client().admin().indices().prepareStats("index-2").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(0L)); + assertThat( + client().admin().indices().prepareStats("index-3").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(0L)); + assertThat( + client().admin().indices().prepareStats("index-3").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(0L)); + + final SearchResponse r1 = client().prepareSearch("index-*").setSearchType(SearchType.QUERY_THEN_FETCH).setSize(0) + .setQuery(QueryBuilders.rangeQuery("d").gte("now-7d/d").lte("now")).get(); + assertSearchResponse(r1); + assertThat(r1.getHits().getTotalHits(), equalTo(8L)); + assertThat( + client().admin().indices().prepareStats("index-1").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(0L)); + assertThat( + client().admin().indices().prepareStats("index-1").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(1L)); + assertThat( + client().admin().indices().prepareStats("index-2").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(0L)); + assertThat( + client().admin().indices().prepareStats("index-2").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(1L)); + // Because the query will INTERSECT with the 3rd index it will not be + // rewritten and will still contain `now` so won't be recorded as a + // cache miss or cache hit since queries containing now can't be cached + assertThat( + client().admin().indices().prepareStats("index-3").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(0L)); + assertThat( + client().admin().indices().prepareStats("index-3").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(0L)); + + final SearchResponse r2 = client().prepareSearch("index-*").setSearchType(SearchType.QUERY_THEN_FETCH).setSize(0) + .setQuery(QueryBuilders.rangeQuery("d").gte("now-7d/d").lte("now")).get(); + assertSearchResponse(r2); + assertThat(r2.getHits().getTotalHits(), equalTo(8L)); + assertThat( + client().admin().indices().prepareStats("index-1").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(1L)); + assertThat( + client().admin().indices().prepareStats("index-1").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(1L)); + assertThat( + client().admin().indices().prepareStats("index-2").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(1L)); + assertThat( + client().admin().indices().prepareStats("index-2").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(1L)); + assertThat( + client().admin().indices().prepareStats("index-3").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(0L)); + assertThat( + client().admin().indices().prepareStats("index-3").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(0L)); + + final SearchResponse r3 = client().prepareSearch("index-*").setSearchType(SearchType.QUERY_THEN_FETCH).setSize(0) + .setQuery(QueryBuilders.rangeQuery("d").gte("now-7d/d").lte("now")).get(); + assertSearchResponse(r3); + assertThat(r3.getHits().getTotalHits(), equalTo(8L)); + assertThat( + client().admin().indices().prepareStats("index-1").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(2L)); + assertThat( + client().admin().indices().prepareStats("index-1").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(1L)); + assertThat( + client().admin().indices().prepareStats("index-2").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(2L)); + assertThat( + client().admin().indices().prepareStats("index-2").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(1L)); + assertThat( + client().admin().indices().prepareStats("index-3").setRequestCache(true).get().getTotal().getRequestCache().getHitCount(), + equalTo(0L)); + assertThat( + client().admin().indices().prepareStats("index-3").setRequestCache(true).get().getTotal().getRequestCache().getMissCount(), + equalTo(0L)); + } + } diff --git a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java index 1015e236d42..0afc0f53a32 100644 --- a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java +++ b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java @@ -22,8 +22,12 @@ package org.elasticsearch.recovery; import org.elasticsearch.action.admin.indices.refresh.RefreshResponse; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.action.admin.indices.stats.ShardStats; +import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.cluster.routing.Murmur3HashFunction; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.common.Priority; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.Loggers; @@ -31,6 +35,7 @@ import org.elasticsearch.common.math.MathUtils; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.shard.DocsStats; +import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.translog.Translog; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.sort.SortOrder; @@ -50,7 +55,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllS import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoTimeout; -@TestLogging("_root:DEBUG") +@TestLogging("_root:DEBUG,index.shard:TRACE") public class RecoveryWhileUnderLoadIT extends ESIntegTestCase { private final ESLogger logger = Loggers.getLogger(RecoveryWhileUnderLoadIT.class); @@ -270,14 +275,12 @@ public class RecoveryWhileUnderLoadIT extends ESIntegTestCase { private void iterateAssertCount(final int numberOfShards, final long numberOfDocs, final int iterations) throws Exception { SearchResponse[] iterationResults = new SearchResponse[iterations]; boolean error = false; - SearchResponse lastErroneousResponse = null; for (int i = 0; i < iterations; i++) { SearchResponse searchResponse = client().prepareSearch().setSize((int) numberOfDocs).setQuery(matchAllQuery()).addSort("id", SortOrder.ASC).get(); logSearchResponse(numberOfShards, numberOfDocs, i, searchResponse); iterationResults[i] = searchResponse; if (searchResponse.getHits().totalHits() != numberOfDocs) { error = true; - lastErroneousResponse = searchResponse; } } @@ -289,12 +292,21 @@ public class RecoveryWhileUnderLoadIT extends ESIntegTestCase { logger.info("shard [{}] - count {}, primary {}", shardStats.getShardRouting().id(), docsStats.getCount(), shardStats.getShardRouting().primary()); } - - for (int doc = 1, hit = 0; hit < lastErroneousResponse.getHits().getHits().length; hit++, doc++) { - SearchHit searchHit = lastErroneousResponse.getHits().getAt(hit); - while (doc < Integer.parseInt(searchHit.id())) { - logger.info("missing doc [{}], indexed to shard [{}]", doc, MathUtils.mod(Murmur3HashFunction.hash(Integer.toString(doc)), numberOfShards)); - doc++; + ClusterService clusterService = clusterService(); + final ClusterState state = clusterService.state(); + for (int shard = 0; shard < numberOfShards; shard++) { + // background indexer starts using ids on 1 + for (int id = 1; id <= numberOfDocs; id++) { + ShardId docShard = clusterService.operationRouting().shardId(state, "test", Long.toString(id), null); + if (docShard.id() == shard) { + for (ShardRouting shardRouting : state.routingTable().shardRoutingTable("test", shard)) { + GetResponse response = client().prepareGet("test", "type", Long.toString(id)) + .setPreference("_only_node:" + shardRouting.currentNodeId()).get(); + if (response.isExists()) { + logger.info("missing id [{}] on shard {}", id, shardRouting); + } + } + } } } diff --git a/core/src/test/java/org/elasticsearch/search/query/ExistsIT.java b/core/src/test/java/org/elasticsearch/search/query/ExistsIT.java index 6b226d2c569..54515c6a2b0 100644 --- a/core/src/test/java/org/elasticsearch/search/query/ExistsIT.java +++ b/core/src/test/java/org/elasticsearch/search/query/ExistsIT.java @@ -58,9 +58,6 @@ public class ExistsIT extends ESIntegTestCase { XContentBuilder mapping = XContentBuilder.builder(JsonXContent.jsonXContent) .startObject() .startObject("type") - .startObject(FieldNamesFieldMapper.NAME) - .field("enabled", randomBoolean()) - .endObject() .startObject("properties") .startObject("foo") .field("type", "text") @@ -89,10 +86,10 @@ public class ExistsIT extends ESIntegTestCase { .endObject(); assertAcked(client().admin().indices().prepareCreate("idx").addMapping("type", mapping)); - @SuppressWarnings("unchecked") Map barObject = new HashMap<>(); barObject.put("foo", "bar"); barObject.put("bar", singletonMap("bar", "foo")); + @SuppressWarnings("unchecked") final Map[] sources = new Map[] { // simple property singletonMap("foo", "bar"), diff --git a/core/src/test/java/org/elasticsearch/search/sort/AbstractSortTestCase.java b/core/src/test/java/org/elasticsearch/search/sort/AbstractSortTestCase.java index f7f9edbc0b2..84b23fb1449 100644 --- a/core/src/test/java/org/elasticsearch/search/sort/AbstractSortTestCase.java +++ b/core/src/test/java/org/elasticsearch/search/sort/AbstractSortTestCase.java @@ -19,6 +19,8 @@ package org.elasticsearch.search.sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.util.Accountable; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; @@ -30,27 +32,75 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.cache.bitset.BitsetFilterCache; +import org.elasticsearch.index.fielddata.IndexFieldDataService; +import org.elasticsearch.index.mapper.ContentPath; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.Mapper.BuilderContext; +import org.elasticsearch.index.mapper.core.DoubleFieldMapper.DoubleFieldType; +import org.elasticsearch.index.mapper.object.ObjectMapper; +import org.elasticsearch.index.mapper.object.ObjectMapper.Nested; import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.indices.fielddata.cache.IndicesFieldDataCache; import org.elasticsearch.indices.query.IndicesQueriesRegistry; +import org.elasticsearch.script.CompiledScript; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptContext; +import org.elasticsearch.script.ScriptContextRegistry; +import org.elasticsearch.script.ScriptEngineRegistry; +import org.elasticsearch.script.ScriptService; +import org.elasticsearch.script.ScriptServiceTests.TestEngineService; +import org.elasticsearch.script.ScriptSettings; import org.elasticsearch.search.SearchModule; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.watcher.ResourceWatcherService; import org.junit.AfterClass; import org.junit.BeforeClass; import java.io.IOException; +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; +import java.util.Map; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; -public abstract class AbstractSortTestCase> extends ESTestCase { +public abstract class AbstractSortTestCase & SortBuilderParser> extends ESTestCase { protected static NamedWriteableRegistry namedWriteableRegistry; private static final int NUMBER_OF_TESTBUILDERS = 20; static IndicesQueriesRegistry indicesQueriesRegistry; + private static SortParseElement parseElement = new SortParseElement(); + private static ScriptService scriptService; @BeforeClass - public static void init() { + public static void init() throws IOException { + Path genericConfigFolder = createTempDir(); + Settings baseSettings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .put(Environment.PATH_CONF_SETTING.getKey(), genericConfigFolder) + .build(); + Environment environment = new Environment(baseSettings); + ScriptContextRegistry scriptContextRegistry = new ScriptContextRegistry(Collections.emptyList()); + ScriptEngineRegistry scriptEngineRegistry = new ScriptEngineRegistry(Collections.singletonList(new ScriptEngineRegistry + .ScriptEngineRegistration(TestEngineService.class, TestEngineService.TYPES))); + ScriptSettings scriptSettings = new ScriptSettings(scriptEngineRegistry, scriptContextRegistry); + scriptService = new ScriptService(baseSettings, environment, Collections.singleton(new TestEngineService()), + new ResourceWatcherService(baseSettings, null), scriptEngineRegistry, scriptContextRegistry, scriptSettings) { + @Override + public CompiledScript compile(Script script, ScriptContext scriptContext, Map params) { + return new CompiledScript(ScriptType.INLINE, "mockName", "test", script); + } + }; + namedWriteableRegistry = new NamedWriteableRegistry(); namedWriteableRegistry.registerPrototype(SortBuilder.class, GeoDistanceSortBuilder.PROTOTYPE); namedWriteableRegistry.registerPrototype(SortBuilder.class, ScoreSortBuilder.PROTOTYPE); @@ -97,13 +147,40 @@ public abstract class AbstractSortTestCase sortFields = parseElement.parse(parser, mockShardContext); + assertEquals(1, sortFields.size()); + SortField sortFieldOldStyle = sortFields.get(0); + assertEquals(sortFieldOldStyle.getField(), sortField.getField()); + assertEquals(sortFieldOldStyle.getReverse(), sortField.getReverse()); + assertEquals(sortFieldOldStyle.getType(), sortField.getType()); + } + } + /** * Test serialization and deserialization of the test sort. */ @@ -148,8 +225,50 @@ public abstract class AbstractSortTestCase(name).nested(Nested.newNested(false, false)).build(context); + } + }; + } + + /** + * Return a field type. We use {@link DoubleFieldType} by default since it is compatible with all sort modes + * Tests that require other field type than double can override this. + */ + protected MappedFieldType provideMappedFieldType(String name) { + DoubleFieldType doubleFieldType = new DoubleFieldType(); + doubleFieldType.setName(name); + doubleFieldType.setHasDocValues(true); + return doubleFieldType; + } + @SuppressWarnings("unchecked") - protected T copyItem(T original) throws IOException { + private T copyItem(T original) throws IOException { try (BytesStreamOutput output = new BytesStreamOutput()) { original.writeTo(output); try (StreamInput in = new NamedWriteableAwareStreamInput(StreamInput.wrap(output.bytes()), namedWriteableRegistry)) { diff --git a/core/src/test/java/org/elasticsearch/search/sort/FieldSortBuilderTests.java b/core/src/test/java/org/elasticsearch/search/sort/FieldSortBuilderTests.java index 025f7930165..d00b60e0c83 100644 --- a/core/src/test/java/org/elasticsearch/search/sort/FieldSortBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/search/sort/FieldSortBuilderTests.java @@ -25,7 +25,7 @@ public class FieldSortBuilderTests extends AbstractSortTestCase> nodePlugins() { return pluginList(InternalSettingsPlugin.class); @@ -69,7 +71,7 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { */ Version version = VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.CURRENT); Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); - assertAcked(prepareCreate("index").setSettings(settings).addMapping("type", "location", "type=geo_point")); + assertAcked(prepareCreate("index").setSettings(settings).addMapping("type", LOCATION_FIELD, "type=geo_point")); XContentBuilder d1Builder = jsonBuilder(); GeoPoint[] d1Points = {new GeoPoint(3, 2), new GeoPoint(4, 1)}; createShuffeldJSONArray(d1Builder, d1Points); @@ -95,7 +97,7 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { SearchResponse searchResponse = client().prepareSearch() .setQuery(matchAllQuery()) - .addSort(new GeoDistanceSortBuilder("location", q).sortMode(SortMode.MIN).order(SortOrder.ASC).geoDistance(GeoDistance.PLANE).unit(DistanceUnit.KILOMETERS)) + .addSort(new GeoDistanceSortBuilder(LOCATION_FIELD, q).sortMode(SortMode.MIN).order(SortOrder.ASC).geoDistance(GeoDistance.PLANE).unit(DistanceUnit.KILOMETERS)) .execute().actionGet(); assertOrderedSearchHits(searchResponse, "d1", "d2"); assertThat((Double)searchResponse.getHits().getAt(0).getSortValues()[0], closeTo(GeoDistance.PLANE.calculate(2, 2, 3, 2, DistanceUnit.KILOMETERS), 0.01d)); @@ -103,7 +105,7 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { searchResponse = client().prepareSearch() .setQuery(matchAllQuery()) - .addSort(new GeoDistanceSortBuilder("location", q).sortMode(SortMode.MIN).order(SortOrder.DESC).geoDistance(GeoDistance.PLANE).unit(DistanceUnit.KILOMETERS)) + .addSort(new GeoDistanceSortBuilder(LOCATION_FIELD, q).sortMode(SortMode.MIN).order(SortOrder.DESC).geoDistance(GeoDistance.PLANE).unit(DistanceUnit.KILOMETERS)) .execute().actionGet(); assertOrderedSearchHits(searchResponse, "d2", "d1"); assertThat((Double)searchResponse.getHits().getAt(0).getSortValues()[0], closeTo(GeoDistance.PLANE.calculate(2, 1, 5, 1, DistanceUnit.KILOMETERS), 0.01d)); @@ -111,7 +113,7 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { searchResponse = client().prepareSearch() .setQuery(matchAllQuery()) - .addSort(new GeoDistanceSortBuilder("location", q).sortMode(SortMode.MAX).order(SortOrder.ASC).geoDistance(GeoDistance.PLANE).unit(DistanceUnit.KILOMETERS)) + .addSort(new GeoDistanceSortBuilder(LOCATION_FIELD, q).sortMode(SortMode.MAX).order(SortOrder.ASC).geoDistance(GeoDistance.PLANE).unit(DistanceUnit.KILOMETERS)) .execute().actionGet(); assertOrderedSearchHits(searchResponse, "d1", "d2"); assertThat((Double)searchResponse.getHits().getAt(0).getSortValues()[0], closeTo(GeoDistance.PLANE.calculate(2, 2, 4, 1, DistanceUnit.KILOMETERS), 0.01d)); @@ -119,18 +121,61 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { searchResponse = client().prepareSearch() .setQuery(matchAllQuery()) - .addSort(new GeoDistanceSortBuilder("location", q).sortMode(SortMode.MAX).order(SortOrder.DESC).geoDistance(GeoDistance.PLANE).unit(DistanceUnit.KILOMETERS)) + .addSort(new GeoDistanceSortBuilder(LOCATION_FIELD, q).sortMode(SortMode.MAX).order(SortOrder.DESC).geoDistance(GeoDistance.PLANE).unit(DistanceUnit.KILOMETERS)) .execute().actionGet(); assertOrderedSearchHits(searchResponse, "d2", "d1"); assertThat((Double)searchResponse.getHits().getAt(0).getSortValues()[0], closeTo(GeoDistance.PLANE.calculate(2, 1, 6, 2, DistanceUnit.KILOMETERS), 0.01d)); assertThat((Double)searchResponse.getHits().getAt(1).getSortValues()[0], closeTo(GeoDistance.PLANE.calculate(2, 2, 4, 1, DistanceUnit.KILOMETERS), 0.01d)); } + public void testSingeToManyAvgMedian() throws ExecutionException, InterruptedException, IOException { + /** + * q = (0, 0) + * + * d1 = (0, 1), (0, 4), (0, 10); so avg. distance is 5, median distance is 4 + * d2 = (0, 1), (0, 5), (0, 6); so avg. distance is 4, median distance is 5 + */ + Version version = VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.CURRENT); + Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); + assertAcked(prepareCreate("index").setSettings(settings).addMapping("type", LOCATION_FIELD, "type=geo_point")); + XContentBuilder d1Builder = jsonBuilder(); + GeoPoint[] d1Points = {new GeoPoint(0, 1), new GeoPoint(0, 4), new GeoPoint(0, 10)}; + createShuffeldJSONArray(d1Builder, d1Points); + + XContentBuilder d2Builder = jsonBuilder(); + GeoPoint[] d2Points = {new GeoPoint(0, 1), new GeoPoint(0, 5), new GeoPoint(0, 6)}; + createShuffeldJSONArray(d2Builder, d2Points); + + logger.info("d1: {}", d1Builder); + logger.info("d2: {}", d2Builder); + indexRandom(true, + client().prepareIndex("index", "type", "d1").setSource(d1Builder), + client().prepareIndex("index", "type", "d2").setSource(d2Builder)); + ensureYellow(); + GeoPoint q = new GeoPoint(0,0); + + SearchResponse searchResponse = client().prepareSearch() + .setQuery(matchAllQuery()) + .addSort(new GeoDistanceSortBuilder(LOCATION_FIELD, q).sortMode(SortMode.AVG).order(SortOrder.ASC).geoDistance(GeoDistance.PLANE).unit(DistanceUnit.KILOMETERS)) + .execute().actionGet(); + assertOrderedSearchHits(searchResponse, "d2", "d1"); + assertThat((Double)searchResponse.getHits().getAt(0).getSortValues()[0], closeTo(GeoDistance.PLANE.calculate(0, 0, 0, 4, DistanceUnit.KILOMETERS), 0.01d)); + assertThat((Double)searchResponse.getHits().getAt(1).getSortValues()[0], closeTo(GeoDistance.PLANE.calculate(0, 0, 0, 5, DistanceUnit.KILOMETERS), 0.01d)); + + searchResponse = client().prepareSearch() + .setQuery(matchAllQuery()) + .addSort(new GeoDistanceSortBuilder(LOCATION_FIELD, q).sortMode(SortMode.MEDIAN).order(SortOrder.ASC).geoDistance(GeoDistance.PLANE).unit(DistanceUnit.KILOMETERS)) + .execute().actionGet(); + assertOrderedSearchHits(searchResponse, "d1", "d2"); + assertThat((Double)searchResponse.getHits().getAt(0).getSortValues()[0], closeTo(GeoDistance.PLANE.calculate(0, 0, 0, 4, DistanceUnit.KILOMETERS), 0.01d)); + assertThat((Double)searchResponse.getHits().getAt(1).getSortValues()[0], closeTo(GeoDistance.PLANE.calculate(0, 0, 0, 5, DistanceUnit.KILOMETERS), 0.01d)); + } + protected void createShuffeldJSONArray(XContentBuilder builder, GeoPoint[] pointsArray) throws IOException { List points = new ArrayList<>(); points.addAll(Arrays.asList(pointsArray)); builder.startObject(); - builder.startArray("location"); + builder.startArray(LOCATION_FIELD); int numPoints = points.size(); for (int i = 0; i < numPoints; i++) { builder.value(points.remove(randomInt(points.size() - 1))); @@ -154,7 +199,7 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { */ Version version = VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.CURRENT); Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); - assertAcked(prepareCreate("index").setSettings(settings).addMapping("type", "location", "type=geo_point")); + assertAcked(prepareCreate("index").setSettings(settings).addMapping("type", LOCATION_FIELD, "type=geo_point")); XContentBuilder d1Builder = jsonBuilder(); GeoPoint[] d1Points = {new GeoPoint(2.5, 1), new GeoPoint(2.75, 2), new GeoPoint(3, 3), new GeoPoint(3.25, 4)}; createShuffeldJSONArray(d1Builder, d1Points); @@ -177,13 +222,13 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { int at = randomInt(3 - i); if (randomBoolean()) { if (geoDistanceSortBuilder == null) { - geoDistanceSortBuilder = new GeoDistanceSortBuilder("location", qHashes.get(at)); + geoDistanceSortBuilder = new GeoDistanceSortBuilder(LOCATION_FIELD, qHashes.get(at)); } else { geoDistanceSortBuilder.geohashes(qHashes.get(at)); } } else { if (geoDistanceSortBuilder == null) { - geoDistanceSortBuilder = new GeoDistanceSortBuilder("location", qPoints.get(at)); + geoDistanceSortBuilder = new GeoDistanceSortBuilder(LOCATION_FIELD, qPoints.get(at)); } else { geoDistanceSortBuilder.points(qPoints.get(at)); } @@ -211,15 +256,15 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { } public void testSinglePointGeoDistanceSort() throws ExecutionException, InterruptedException, IOException { - assertAcked(prepareCreate("index").addMapping("type", "location", "type=geo_point")); + assertAcked(prepareCreate("index").addMapping("type", LOCATION_FIELD, "type=geo_point")); indexRandom(true, - client().prepareIndex("index", "type", "d1").setSource(jsonBuilder().startObject().startObject("location").field("lat", 1).field("lon", 1).endObject().endObject()), - client().prepareIndex("index", "type", "d2").setSource(jsonBuilder().startObject().startObject("location").field("lat", 1).field("lon", 2).endObject().endObject())); + client().prepareIndex("index", "type", "d1").setSource(jsonBuilder().startObject().startObject(LOCATION_FIELD).field("lat", 1).field("lon", 1).endObject().endObject()), + client().prepareIndex("index", "type", "d2").setSource(jsonBuilder().startObject().startObject(LOCATION_FIELD).field("lat", 1).field("lon", 2).endObject().endObject())); ensureYellow(); String hashPoint = "s037ms06g7h0"; - GeoDistanceSortBuilder geoDistanceSortBuilder = new GeoDistanceSortBuilder("location", hashPoint); + GeoDistanceSortBuilder geoDistanceSortBuilder = new GeoDistanceSortBuilder(LOCATION_FIELD, hashPoint); SearchResponse searchResponse = client().prepareSearch() .setQuery(matchAllQuery()) @@ -227,7 +272,7 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { .execute().actionGet(); checkCorrectSortOrderForGeoSort(searchResponse); - geoDistanceSortBuilder = new GeoDistanceSortBuilder("location", new GeoPoint(2, 2)); + geoDistanceSortBuilder = new GeoDistanceSortBuilder(LOCATION_FIELD, new GeoPoint(2, 2)); searchResponse = client().prepareSearch() .setQuery(matchAllQuery()) @@ -235,7 +280,7 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { .execute().actionGet(); checkCorrectSortOrderForGeoSort(searchResponse); - geoDistanceSortBuilder = new GeoDistanceSortBuilder("location", 2, 2); + geoDistanceSortBuilder = new GeoDistanceSortBuilder(LOCATION_FIELD, 2, 2); searchResponse = client().prepareSearch() .setQuery(matchAllQuery()) @@ -246,28 +291,28 @@ public class GeoDistanceSortBuilderIT extends ESIntegTestCase { searchResponse = client() .prepareSearch() .setSource( - new SearchSourceBuilder().sort(SortBuilders.geoDistanceSort("location", 2.0, 2.0) + new SearchSourceBuilder().sort(SortBuilders.geoDistanceSort(LOCATION_FIELD, 2.0, 2.0) .unit(DistanceUnit.KILOMETERS).geoDistance(GeoDistance.PLANE))).execute().actionGet(); checkCorrectSortOrderForGeoSort(searchResponse); searchResponse = client() .prepareSearch() .setSource( - new SearchSourceBuilder().sort(SortBuilders.geoDistanceSort("location", "s037ms06g7h0") + new SearchSourceBuilder().sort(SortBuilders.geoDistanceSort(LOCATION_FIELD, "s037ms06g7h0") .unit(DistanceUnit.KILOMETERS).geoDistance(GeoDistance.PLANE))).execute().actionGet(); checkCorrectSortOrderForGeoSort(searchResponse); searchResponse = client() .prepareSearch() .setSource( - new SearchSourceBuilder().sort(SortBuilders.geoDistanceSort("location", 2.0, 2.0) + new SearchSourceBuilder().sort(SortBuilders.geoDistanceSort(LOCATION_FIELD, 2.0, 2.0) .unit(DistanceUnit.KILOMETERS).geoDistance(GeoDistance.PLANE))).execute().actionGet(); checkCorrectSortOrderForGeoSort(searchResponse); searchResponse = client() .prepareSearch() .setSource( - new SearchSourceBuilder().sort(SortBuilders.geoDistanceSort("location", 2.0, 2.0) + new SearchSourceBuilder().sort(SortBuilders.geoDistanceSort(LOCATION_FIELD, 2.0, 2.0) .unit(DistanceUnit.KILOMETERS).geoDistance(GeoDistance.PLANE) .ignoreMalformed(true).coerce(true))).execute().actionGet(); checkCorrectSortOrderForGeoSort(searchResponse); diff --git a/core/src/test/java/org/elasticsearch/search/sort/GeoDistanceSortBuilderTests.java b/core/src/test/java/org/elasticsearch/search/sort/GeoDistanceSortBuilderTests.java index 50e4aeeb71b..1fc0a8cacde 100644 --- a/core/src/test/java/org/elasticsearch/search/sort/GeoDistanceSortBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/search/sort/GeoDistanceSortBuilderTests.java @@ -26,6 +26,8 @@ import org.elasticsearch.common.geo.GeoPoint; import org.elasticsearch.common.unit.DistanceUnit; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper; import org.elasticsearch.index.query.QueryParseContext; import org.elasticsearch.test.geo.RandomGeoGenerator; @@ -89,6 +91,13 @@ public class GeoDistanceSortBuilderTests extends AbstractSortTestCase set = new HashSet<>(); + set.add(original); + return mode(set); + } + + public static SortMode mode(Set except) { SortMode mode = ESTestCase.randomFrom(SortMode.values()); - while (mode.equals(original)) { + while (except.contains(mode)) { mode = ESTestCase.randomFrom(SortMode.values()); } return mode; diff --git a/core/src/test/java/org/elasticsearch/search/sort/ScriptSortBuilderTests.java b/core/src/test/java/org/elasticsearch/search/sort/ScriptSortBuilderTests.java index 091a6c3002a..b28285e096c 100644 --- a/core/src/test/java/org/elasticsearch/search/sort/ScriptSortBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/search/sort/ScriptSortBuilderTests.java @@ -33,18 +33,29 @@ import org.junit.Rule; import org.junit.rules.ExpectedException; import java.io.IOException; +import java.util.HashSet; +import java.util.Set; public class ScriptSortBuilderTests extends AbstractSortTestCase { @Override protected ScriptSortBuilder createTestItem() { + ScriptSortType type = randomBoolean() ? ScriptSortType.NUMBER : ScriptSortType.STRING; ScriptSortBuilder builder = new ScriptSortBuilder(new Script(randomAsciiOfLengthBetween(5, 10)), - randomBoolean() ? ScriptSortType.NUMBER : ScriptSortType.STRING); + type); if (randomBoolean()) { - builder.order(RandomSortDataGenerator.order(builder.order())); + builder.order(RandomSortDataGenerator.order(builder.order())); } if (randomBoolean()) { - builder.sortMode(RandomSortDataGenerator.mode(builder.sortMode())); + if (type == ScriptSortType.NUMBER) { + builder.sortMode(RandomSortDataGenerator.mode(builder.sortMode())); + } else { + Set exceptThis = new HashSet<>(); + exceptThis.add(SortMode.SUM); + exceptThis.add(SortMode.AVG); + exceptThis.add(SortMode.MEDIAN); + builder.sortMode(RandomSortDataGenerator.mode(exceptThis)); + } } if (randomBoolean()) { builder.setNestedFilter(RandomSortDataGenerator.nestedFilter(builder.getNestedFilter())); @@ -68,7 +79,7 @@ public class ScriptSortBuilderTests extends AbstractSortTestCase "type" : "nested", "properties" : { - "name" : { "type" : "string" }, + "name" : { "type" : "text" }, "price" : { "type" : "double" } } } diff --git a/docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc b/docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc index 03bcdb0a18c..9dba1f2adf0 100644 --- a/docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc @@ -22,12 +22,12 @@ the issue documents as nested documents. The mapping could look like: "issue" : { "properties" : { - "tags" : { "type" : "string" } + "tags" : { "type" : "text" } "comments" : { <1> "type" : "nested" "properties" : { - "username" : { "type" : "string", "index" : "not_analyzed" }, - "comment" : { "type" : "string" } + "username" : { "type" : "keyword" }, + "comment" : { "type" : "text" } } } } diff --git a/docs/reference/analysis/analyzers/keyword-analyzer.asciidoc b/docs/reference/analysis/analyzers/keyword-analyzer.asciidoc index 7704895c9da..815037596cf 100644 --- a/docs/reference/analysis/analyzers/keyword-analyzer.asciidoc +++ b/docs/reference/analysis/analyzers/keyword-analyzer.asciidoc @@ -4,4 +4,4 @@ An analyzer of type `keyword` that "tokenizes" an entire stream as a single token. This is useful for data like zip codes, ids and so on. Note, when using mapping definitions, it might make more sense to simply -mark the field as `not_analyzed`. +map the field as a <>. diff --git a/docs/reference/cat/nodes.asciidoc b/docs/reference/cat/nodes.asciidoc index 90ff91513aa..6d4750da5a6 100644 --- a/docs/reference/cat/nodes.asciidoc +++ b/docs/reference/cat/nodes.asciidoc @@ -114,10 +114,18 @@ node (c) |d cache memory |0b |`fielddata.evictions` |`fe`, `fielddataEvictions` |No |Fielddata cache evictions |0 -|`filter_cache.memory_size` |`fcm`, `filterCacheMemory` |No |Used filter +|`query_cache.memory_size` |`qcm`, `queryCacheMemory` |No |Used query cache memory |0b -|`filter_cache.evictions` |`fce`, `filterCacheEvictions` |No |Filter +|`query_cache.evictions` |`qce`, `queryCacheEvictions` |No |Query cache evictions |0 +|`request_cache.memory_size` |`rcm`, `requestCacheMemory` |No | Used request +cache memory |0b +|`request_cache.evictions` |`rce`, `requestCacheEvictions` |No |Request +cache evictions |0 +|`request_cache.hit_count` |`rchc`, `requestCacheHitCount` |No | Request +cache hit count |0 +|`request_cache.miss_count` |`rcmc`, `requestCacheMissCount` |No | Request +cache miss count |0 |`flush.total` |`ft`, `flushTotal` |No |Number of flushes |1 |`flush.total_time` |`ftt`, `flushTotalTime` |No |Time spent in flush |1 |`get.current` |`gc`, `getCurrent` |No |Number of current get diff --git a/docs/reference/docs/reindex.asciidoc b/docs/reference/docs/reindex.asciidoc index 8173503054f..5f4641ca187 100644 --- a/docs/reference/docs/reindex.asciidoc +++ b/docs/reference/docs/reindex.asciidoc @@ -299,7 +299,8 @@ POST /_reindex === URL Parameters In addition to the standard parameters like `pretty`, the Reindex API also -supports `refresh`, `wait_for_completion`, `consistency`, and `timeout`. +supports `refresh`, `wait_for_completion`, `consistency`, `timeout`, and +`requests_per_second`. Sending the `refresh` url parameter will cause all indexes to which the request wrote to be refreshed. This is different than the Index API's `refresh` @@ -317,8 +318,14 @@ request. `timeout` controls how long each write request waits for unavailable shards to become available. Both work exactly how they work in the {ref}/docs-bulk.html[Bulk API]. -`timeout` controls how long each batch waits for the target shard to become -available. It works exactly how it works in the {ref}/docs-bulk.html[Bulk API]. +`requests_per_second` can be set to any decimal number (1.4, 6, 1000, etc) and +throttle the number of requests per second that the reindex issues. The +throttling is done waiting between bulk batches so that it can manipulate the +scroll timeout. The wait time is the difference between the time it took the +batch to complete and the time `requests_per_second * requests_in_the_batch`. +Since the batch isn't broken into multiple bulk requests large batch sizes will +cause Elasticsearch to create many requests and then wait for a while before +starting the next set. This is "bursty" instead of "smooth". [float] === Response body @@ -333,6 +340,8 @@ The JSON response looks like this: "created": 123, "batches": 1, "version_conflicts": 2, + "retries": 0, + "throttled_millis": 0, "failures" : [ ] } -------------------------------------------------- @@ -357,6 +366,14 @@ The number of scroll responses pulled back by the the reindex. The number of version conflicts that reindex hit. +`retries`:: + +The number of retries that the reindex did in response to a full queue. + +`throttled_millis`:: + +Number of milliseconds the request slept to conform to `requests_per_second`. + `failures`:: Array of all indexing failures. If this is non-empty then the request aborted @@ -403,7 +420,9 @@ The responses looks like: "deleted" : 0, "batches" : 36, "version_conflicts" : 0, - "noops" : 0 + "noops" : 0, + "retries": 0, + "throttled_millis": 0 }, "description" : "" } ] diff --git a/docs/reference/docs/termvectors.asciidoc b/docs/reference/docs/termvectors.asciidoc index 0e108430f85..34fcaaecda1 100644 --- a/docs/reference/docs/termvectors.asciidoc +++ b/docs/reference/docs/termvectors.asciidoc @@ -136,13 +136,13 @@ curl -s -XPUT 'http://localhost:9200/twitter/' -d '{ "tweet": { "properties": { "text": { - "type": "string", + "type": "text", "term_vector": "with_positions_offsets_payloads", "store" : true, "analyzer" : "fulltext_analyzer" }, "fullname": { - "type": "string", + "type": "text", "term_vector": "with_positions_offsets_payloads", "analyzer" : "fulltext_analyzer" } diff --git a/docs/reference/docs/update-by-query.asciidoc b/docs/reference/docs/update-by-query.asciidoc index 13b5f6fc0eb..52667bf79a8 100644 --- a/docs/reference/docs/update-by-query.asciidoc +++ b/docs/reference/docs/update-by-query.asciidoc @@ -169,8 +169,14 @@ request. `timeout` controls how long each write request waits for unavailable shards to become available. Both work exactly how they work in the {ref}/docs-bulk.html[Bulk API]. -`timeout` controls how long each batch waits for the target shard to become -available. It works exactly how it works in the {ref}/docs-bulk.html[Bulk API]. +`requests_per_second` can be set to any decimal number (1.4, 6, 1000, etc) and +throttle the number of requests per second that the update by query issues. The +throttling is done waiting between bulk batches so that it can manipulate the +scroll timeout. The wait time is the difference between the time it took the +batch to complete and the time `requests_per_second * requests_in_the_batch`. +Since the batch isn't broken into multiple bulk requests large batch sizes will +cause Elasticsearch to create many requests and then wait for a while before +starting the next set. This is "bursty" instead of "smooth". [float] === Response body @@ -184,6 +190,8 @@ The JSON response looks like this: "updated": 0, "batches": 1, "version_conflicts": 2, + "retries": 0, + "throttled_millis": 0, "failures" : [ ] } -------------------------------------------------- @@ -204,6 +212,14 @@ The number of scroll responses pulled back by the the update by query. The number of version conflicts that the update by query hit. +`retries`:: + +The number of retries that the update by query did in response to a full queue. + +`throttled_millis`:: + +Number of milliseconds the request slept to conform to `requests_per_second`. + `failures`:: Array of all indexing failures. If this is non-empty then the request aborted @@ -251,7 +267,9 @@ The responses looks like: "deleted" : 0, "batches" : 36, "version_conflicts" : 0, - "noops" : 0 + "noops" : 0, + "retries": 0, + "throttled_millis": 0 }, "description" : "" } ] @@ -281,7 +299,7 @@ PUT test "test": { "dynamic": false, <1> "properties": { - "text": {"type": "string"} + "text": {"type": "text"} } } } @@ -300,8 +318,8 @@ POST test/test?refresh PUT test/_mapping/test <2> { "properties": { - "text": {"type": "string"}, - "flag": {"type": "string", "analyzer": "keyword"} + "text": {"type": "text"}, + "flag": {"type": "text", "analyzer": "keyword"} } } -------------------------------------------------- diff --git a/docs/reference/index-modules/similarity.asciidoc b/docs/reference/index-modules/similarity.asciidoc index 3d993a3a6eb..07591dc277b 100644 --- a/docs/reference/index-modules/similarity.asciidoc +++ b/docs/reference/index-modules/similarity.asciidoc @@ -39,7 +39,7 @@ Here we configure the DFRSimilarity so it can be referenced as { "book" : { "properties" : { - "title" : { "type" : "string", "similarity" : "my_similarity" } + "title" : { "type" : "text", "similarity" : "my_similarity" } } } -------------------------------------------------- diff --git a/docs/reference/indices/aliases.asciidoc b/docs/reference/indices/aliases.asciidoc index cb8f652070b..ab4e49e7aab 100644 --- a/docs/reference/indices/aliases.asciidoc +++ b/docs/reference/indices/aliases.asciidoc @@ -116,8 +116,7 @@ curl -XPUT 'http://localhost:9200/test1' -d '{ "type1": { "properties": { "user" : { - "type": "string", - "index": "not_analyzed" + "type": "keyword" } } } diff --git a/docs/reference/indices/create-index.asciidoc b/docs/reference/indices/create-index.asciidoc index 2210870135b..11216fa4c01 100644 --- a/docs/reference/indices/create-index.asciidoc +++ b/docs/reference/indices/create-index.asciidoc @@ -78,7 +78,7 @@ curl -XPOST localhost:9200/test -d '{ "mappings" : { "type1" : { "properties" : { - "field1" : { "type" : "string", "index" : "not_analyzed" } + "field1" : { "type" : "text" } } } } diff --git a/docs/reference/indices/get-field-mapping.asciidoc b/docs/reference/indices/get-field-mapping.asciidoc index 2aeb853e9f4..39667dc0874 100644 --- a/docs/reference/indices/get-field-mapping.asciidoc +++ b/docs/reference/indices/get-field-mapping.asciidoc @@ -22,7 +22,7 @@ For which the response is (assuming `text` is a default string field): "text": { "full_name": "text", "mapping": { - "text": { "type": "string" } + "text": { "type": "text" } } } } @@ -73,13 +73,13 @@ For example, consider the following mapping: { "article": { "properties": { - "id": { "type": "string" }, - "title": { "type": "string"}, - "abstract": { "type": "string"}, + "id": { "type": "text" }, + "title": { "type": "text"}, + "abstract": { "type": "text"}, "author": { "properties": { - "id": { "type": "string" }, - "name": { "type": "string" } + "id": { "type": "text" }, + "name": { "type": "text" } } } } @@ -105,19 +105,19 @@ returns: "abstract": { "full_name": "abstract", "mapping": { - "abstract": { "type": "string" } + "abstract": { "type": "text" } } }, "author.id": { "full_name": "author.id", "mapping": { - "id": { "type": "string" } + "id": { "type": "text" } } }, "name": { "full_name": "author.name", "mapping": { - "name": { "type": "string" } + "name": { "type": "text" } } } } diff --git a/docs/reference/indices/put-mapping.asciidoc b/docs/reference/indices/put-mapping.asciidoc index 7dd2389e824..cc94a08f626 100644 --- a/docs/reference/indices/put-mapping.asciidoc +++ b/docs/reference/indices/put-mapping.asciidoc @@ -12,7 +12,7 @@ PUT twitter <1> "tweet": { "properties": { "message": { - "type": "string" + "type": "text" } } } @@ -23,7 +23,7 @@ PUT twitter/_mapping/user <2> { "properties": { "name": { - "type": "string" + "type": "text" } } } @@ -32,7 +32,7 @@ PUT twitter/_mapping/tweet <3> { "properties": { "user_name": { - "type": "string" + "type": "text" } } } @@ -86,13 +86,12 @@ PUT my_index <1> "name": { "properties": { "first": { - "type": "string" + "type": "text" } } }, "user_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword" } } } @@ -105,13 +104,12 @@ PUT my_index/_mapping/user "name": { "properties": { "last": { <2> - "type": "string" + "type": "text" } } }, "user_id": { - "type": "string", - "index": "not_analyzed", + "type": "keyword", "ignore_above": 100 <3> } } @@ -149,7 +147,7 @@ PUT my_index "type_one": { "properties": { "text": { <1> - "type": "string", + "type": "text", "analyzer": "standard" } } @@ -157,7 +155,7 @@ PUT my_index "type_two": { "properties": { "text": { <1> - "type": "string", + "type": "text", "analyzer": "standard" } } @@ -169,7 +167,7 @@ PUT my_index/_mapping/type_one <2> { "properties": { "text": { - "type": "string", + "type": "text", "analyzer": "standard", "search_analyzer": "whitespace" } @@ -180,7 +178,7 @@ PUT my_index/_mapping/type_one?update_all_types <3> { "properties": { "text": { - "type": "string", + "type": "text", "analyzer": "standard", "search_analyzer": "whitespace" } diff --git a/docs/reference/mapping.asciidoc b/docs/reference/mapping.asciidoc index 407f43625fa..8ead0436978 100644 --- a/docs/reference/mapping.asciidoc +++ b/docs/reference/mapping.asciidoc @@ -46,7 +46,7 @@ Fields with the same name in different mapping types in the same index Each field has a data `type` which can be: -* a simple type like <>, <>, <>, +* a simple type like <>, <>, <>, <>, <>, <> or <>. * a type which supports the hierarchical nature of JSON such as <> or <>. @@ -55,7 +55,7 @@ Each field has a data `type` which can be: It is often useful to index the same field in different ways for different purposes. For instance, a `string` field could be <> as -an `analyzed` field for full-text search, and as a `not_analyzed` field for +a `text` field for full-text search, and as a `keyword` field for sorting or aggregations. Alternatively, you could index a string field with the <>, the <> analyzer, and the @@ -134,18 +134,17 @@ PUT my_index <1> "user": { <2> "_all": { "enabled": false }, <3> "properties": { <4> - "title": { "type": "string" }, <5> - "name": { "type": "string" }, <5> + "title": { "type": "text" }, <5> + "name": { "type": "text" }, <5> "age": { "type": "integer" } <5> } }, "blogpost": { <2> "properties": { <4> - "title": { "type": "string" }, <5> - "body": { "type": "string" }, <5> + "title": { "type": "text" }, <5> + "body": { "type": "text" }, <5> "user_id": { - "type": "string", <5> - "index": "not_analyzed" + "type": "keyword" <5> }, "created": { "type": "date", <5> diff --git a/docs/reference/mapping/dynamic/default-mapping.asciidoc b/docs/reference/mapping/dynamic/default-mapping.asciidoc index c1e1f8dec66..bef90301f0c 100644 --- a/docs/reference/mapping/dynamic/default-mapping.asciidoc +++ b/docs/reference/mapping/dynamic/default-mapping.asciidoc @@ -56,11 +56,10 @@ PUT _template/logging "strings": { <4> "match_mapping_type": "string", "mapping": { - "type": "string", + "type": "text", "fields": { "raw": { - "type": "string", - "index": "not_analyzed", + "type": "keyword", "ignore_above": 256 } } @@ -79,4 +78,4 @@ PUT logs-2015.10.01/event/1 <1> The `logging` template will match any indices beginning with `logs-`. <2> Matching indices will be created with a single primary shard. <3> The `_all` field will be disabled by default for new type mappings. -<4> String fields will be created with an `analyzed` main field, and a `not_analyzed` `.raw` field. +<4> String fields will be created with a `text` main field, and a `keyword` `.raw` field. diff --git a/docs/reference/mapping/dynamic/field-mapping.asciidoc b/docs/reference/mapping/dynamic/field-mapping.asciidoc index 585931d5e3f..f8612958f9c 100644 --- a/docs/reference/mapping/dynamic/field-mapping.asciidoc +++ b/docs/reference/mapping/dynamic/field-mapping.asciidoc @@ -22,7 +22,7 @@ string:: Either a <> field (if the value passes <>), a <> or <> field (if the value passes <>) - or an <> <> field. + or an <> field. These are the only <> that are dynamically detected. All other datatypes must be mapped explicitly. @@ -81,7 +81,7 @@ PUT my_index/my_type/1 <1> -------------------------------------------------- // AUTOSENSE -<1> The `create_date` field has been added as a <> field. +<1> The `create_date` field has been added as a <> field. ===== Customising detected date formats diff --git a/docs/reference/mapping/dynamic/templates.asciidoc b/docs/reference/mapping/dynamic/templates.asciidoc index b903f1af066..1137f454ffd 100644 --- a/docs/reference/mapping/dynamic/templates.asciidoc +++ b/docs/reference/mapping/dynamic/templates.asciidoc @@ -52,7 +52,7 @@ can be automatically detected: `boolean`, `date`, `double`, `long`, `object`, `string`. It also accepts `*` to match all datatypes. For example, if we wanted to map all integer fields as `integer` instead of -`long`, and all `string` fields as both `analyzed` and `not_analyzed`, we +`long`, and all `string` fields as both `text` and `keyword`, we could use the following template: [source,js] @@ -74,11 +74,10 @@ PUT my_index "strings": { "match_mapping_type": "string", "mapping": { - "type": "string", + "type": "text", "fields": { "raw": { - "type": "string", - "index": "not_analyzed", + "type": "keyword", "ignore_above": 256 } } @@ -99,7 +98,7 @@ PUT my_index/my_type/1 -------------------------------------------------- // AUTOSENSE <1> The `my_integer` field is mapped as an `integer`. -<2> The `my_string` field is mapped as an analyzed `string`, with a `not_analyzed` <>. +<2> The `my_string` field is mapped as a `text`, with a `keyword` <>. [[match-unmatch]] @@ -180,7 +179,7 @@ PUT my_index "path_match": "name.*", "path_unmatch": "*.middle", "mapping": { - "type": "string", + "type": "text", "copy_to": "full_name" } } @@ -221,7 +220,7 @@ PUT my_index "match_mapping_type": "string", "match": "*", "mapping": { - "type": "string", + "type": "text", "analyzer": "{name}" } } diff --git a/docs/reference/mapping/fields/all-field.asciidoc b/docs/reference/mapping/fields/all-field.asciidoc index ae52fc1d0d1..6c5f073aee8 100644 --- a/docs/reference/mapping/fields/all-field.asciidoc +++ b/docs/reference/mapping/fields/all-field.asciidoc @@ -45,7 +45,7 @@ from each field as a string. It does not combine the _terms_ from each field. ============================================================================= -The `_all` field is just a <> field, and accepts the same +The `_all` field is just a <> field, and accepts the same parameters that other string fields accept, including `analyzer`, `term_vectors`, `index_options`, and `store`. @@ -136,7 +136,7 @@ PUT my_index }, "properties": { "content": { - "type": "string" + "type": "text" } } } @@ -172,11 +172,11 @@ PUT myindex "mytype": { "properties": { "title": { <1> - "type": "string", + "type": "text", "boost": 2 }, "content": { <1> - "type": "string" + "type": "text" } } } @@ -210,15 +210,15 @@ PUT myindex "mytype": { "properties": { "first_name": { - "type": "string", + "type": "text", "copy_to": "full_name" <1> }, "last_name": { - "type": "string", + "type": "text", "copy_to": "full_name" <1> }, "full_name": { - "type": "string" + "type": "text" } } } diff --git a/docs/reference/mapping/fields/parent-field.asciidoc b/docs/reference/mapping/fields/parent-field.asciidoc index 64f4a9934a6..fb066580044 100644 --- a/docs/reference/mapping/fields/parent-field.asciidoc +++ b/docs/reference/mapping/fields/parent-field.asciidoc @@ -127,7 +127,7 @@ global ordinals for the `_parent` field. Global ordinals, by default, are built lazily: the first parent-child query or aggregation after a refresh will trigger building of global ordinals. This can introduce a significant latency spike for your users. You can use -<> to shift the cost of building global +<> to shift the cost of building global ordinals from query time to refresh time, by mapping the `_parent` field as follows: [source,js] @@ -139,9 +139,7 @@ PUT my_index "my_child": { "_parent": { "type": "my_parent", - "fielddata": { - "loading": "eager_global_ordinals" - } + "eager_global_ordinals": true } } } diff --git a/docs/reference/mapping/params/analyzer.asciidoc b/docs/reference/mapping/params/analyzer.asciidoc index 68009e600db..2a452465e2d 100644 --- a/docs/reference/mapping/params/analyzer.asciidoc +++ b/docs/reference/mapping/params/analyzer.asciidoc @@ -47,10 +47,10 @@ PUT my_index "my_type": { "properties": { "text": { <1> - "type": "string", + "type": "text", "fields": { "english": { <2> - "type": "string", + "type": "text", "analyzer": "english" } } @@ -124,7 +124,7 @@ PUT /my_index "my_type":{ "properties":{ "title": { - "type":"string", + "type":"text", "analyzer":"my_analyzer", <3> "search_analyzer":"my_stop_analyzer", <4> "search_quote_analyzer":"my_analyzer" <5> diff --git a/docs/reference/mapping/params/boost.asciidoc b/docs/reference/mapping/params/boost.asciidoc index 22c0e2e69ea..add6f806844 100644 --- a/docs/reference/mapping/params/boost.asciidoc +++ b/docs/reference/mapping/params/boost.asciidoc @@ -12,11 +12,11 @@ PUT my_index "my_type": { "properties": { "title": { - "type": "string", + "type": "text", "boost": 2 <1> }, "content": { - "type": "string" + "type": "text" } } } @@ -83,4 +83,4 @@ We advise against using index time boosting for the following reasons: byte. This reduces the resolution of the field length normalization factor which can lead to lower quality relevance calculations. -================================================== \ No newline at end of file +================================================== diff --git a/docs/reference/mapping/params/copy-to.asciidoc b/docs/reference/mapping/params/copy-to.asciidoc index b437a87424a..863bf1996cd 100644 --- a/docs/reference/mapping/params/copy-to.asciidoc +++ b/docs/reference/mapping/params/copy-to.asciidoc @@ -15,15 +15,15 @@ PUT /my_index "my_type": { "properties": { "first_name": { - "type": "string", + "type": "text", "copy_to": "full_name" <1> }, "last_name": { - "type": "string", + "type": "text", "copy_to": "full_name" <1> }, "full_name": { - "type": "string" + "type": "text" } } } diff --git a/docs/reference/mapping/params/doc-values.asciidoc b/docs/reference/mapping/params/doc-values.asciidoc index 81f9b6e3c64..4ded2212de1 100644 --- a/docs/reference/mapping/params/doc-values.asciidoc +++ b/docs/reference/mapping/params/doc-values.asciidoc @@ -29,12 +29,10 @@ PUT my_index "my_type": { "properties": { "status_code": { <1> - "type": "string", - "index": "not_analyzed" + "type": "keyword" }, "session_id": { <2> - "type": "string", - "index": "not_analyzed", + "type": "keyword", "doc_values": false } } diff --git a/docs/reference/mapping/params/dynamic.asciidoc b/docs/reference/mapping/params/dynamic.asciidoc index db73709f4f3..72bbd369d7f 100644 --- a/docs/reference/mapping/params/dynamic.asciidoc +++ b/docs/reference/mapping/params/dynamic.asciidoc @@ -67,7 +67,7 @@ PUT my_index "user": { <2> "properties": { "name": { - "type": "string" + "type": "text" }, "social_networks": { <3> "dynamic": true, diff --git a/docs/reference/mapping/params/enabled.asciidoc b/docs/reference/mapping/params/enabled.asciidoc index 6f72f4da890..7bffcfddf2e 100644 --- a/docs/reference/mapping/params/enabled.asciidoc +++ b/docs/reference/mapping/params/enabled.asciidoc @@ -21,8 +21,7 @@ PUT my_index "session": { "properties": { "user_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword" }, "last_updated": { "type": "date" diff --git a/docs/reference/mapping/params/fielddata.asciidoc b/docs/reference/mapping/params/fielddata.asciidoc index 4d96fb61132..e67b47a831f 100644 --- a/docs/reference/mapping/params/fielddata.asciidoc +++ b/docs/reference/mapping/params/fielddata.asciidoc @@ -12,28 +12,28 @@ documents, we need to be able to look up the document and find the terms that it has in a field. Most fields can use index-time, on-disk <> to support -this type of data access pattern, but `analyzed` string fields do not support -`doc_values`. +this type of data access pattern, but `text` fields do not support `doc_values`. -Instead, `analyzed` strings use a query-time data structure called +Instead, `text` strings use a query-time data structure called `fielddata`. This data structure is built on demand the first time that a field is used for aggregations, sorting, or is accessed in a script. It is built by reading the entire inverted index for each segment from disk, inverting the term ↔︎ document relationship, and storing the result in memory, in the JVM heap. -Loading fielddata is an expensive process so, once it has been loaded, it -remains in memory for the lifetime of the segment. +Loading fielddata is an expensive process so it is disabled by default. Also, +when enabled, once it has been loaded, it remains in memory for the lifetime of +the segment. [WARNING] .Fielddata can fill up your heap space ============================================================================== Fielddata can consume a lot of heap space, especially when loading high -cardinality `analyzed` string fields. Most of the time, it doesn't make sense -to sort or aggregate on `analyzed` string fields (with the notable exception +cardinality `text` fields. Most of the time, it doesn't make sense +to sort or aggregate on `text` fields (with the notable exception of the <> -aggregation). Always think about whether a `not_analyzed` field (which can +aggregation). Always think about whether a <> field (which can use `doc_values`) would be a better fit for your use case. ============================================================================== @@ -42,71 +42,6 @@ same name in the same index. Its value can be updated on existing fields using the <>. -[[fielddata-format]] -==== `fielddata.format` - -For `analyzed` string fields, the fielddata `format` controls whether -fielddata should be enabled or not. It accepts: `disabled` and `paged_bytes` -(enabled, which is the default). To disable fielddata loading, you can use -the following mapping: - -[source,js] --------------------------------------------------- -PUT my_index -{ - "mappings": { - "my_type": { - "properties": { - "text": { - "type": "string", - "fielddata": { - "format": "disabled" <1> - } - } - } - } - } -} --------------------------------------------------- -// AUTOSENSE -<1> The `text` field cannot be used for sorting, aggregations, or in scripts. - -.Fielddata and other datatypes -[NOTE] -================================================== - -Historically, other field datatypes also used fielddata, but this has been replaced -by index-time, disk-based <>. - -================================================== - - -[[fielddata-loading]] -==== `fielddata.loading` - -This per-field setting controls when fielddata is loaded into memory. It -accepts three options: - -[horizontal] -`lazy`:: - - Fielddata is only loaded into memory when it is needed. (default) - -`eager`:: - - Fielddata is loaded into memory before a new search segment becomes - visible to search. This can reduce the latency that a user may experience - if their search request has to trigger lazy loading from a big segment. - -`eager_global_ordinals`:: - - Loading fielddata into memory is only part of the work that is required. - After loading the fielddata for each segment, Elasticsearch builds the - <> data structure to make a list of all unique terms - across all the segments in a shard. By default, global ordinals are built - lazily. If the field has a very high cardinality, global ordinals may - take some time to build, in which case you can use eager loading instead. - [[global-ordinals]] .Global ordinals ***************************************** @@ -141,15 +76,10 @@ can move the loading time from the first search request, to the refresh itself. ***************************************** [[field-data-filtering]] -==== `fielddata.filter` +==== `fielddata_frequency_filter` Fielddata filtering can be used to reduce the number of terms loaded into -memory, and thus reduce memory usage. Terms can be filtered by _frequency_ or -by _regular expression_, or a combination of the two: - -Filtering by frequency:: -+ --- +memory, and thus reduce memory usage. Terms can be filtered by _frequency_: The frequency filter allows you to only load terms whose term frequency falls between a `min` and `max` value, which can be expressed an absolute @@ -169,7 +99,7 @@ PUT my_index "my_type": { "properties": { "tag": { - "type": "string", + "type": "text", "fielddata": { "filter": { "frequency": { @@ -186,44 +116,3 @@ PUT my_index } -------------------------------------------------- // AUTOSENSE --- - -Filtering by regex:: -+ --- -Terms can also be filtered by regular expression - only values which -match the regular expression are loaded. Note: the regular expression is -applied to each term in the field, not to the whole field value. For -instance, to only load hashtags from a tweet, we can use a regular -expression which matches terms beginning with `#`: - -[source,js] --------------------------------------------------- -PUT my_index -{ - "mappings": { - "my_type": { - "properties": { - "tweet": { - "type": "string", - "analyzer": "whitespace", - "fielddata": { - "filter": { - "regex": { - "pattern": "^#.*" - } - } - } - } - } - } - } -} --------------------------------------------------- -// AUTOSENSE --- - -These filters can be updated on an existing field mapping and will take -effect the next time the fielddata for a segment is loaded. Use the -<> API -to reload the fielddata using the new filters. diff --git a/docs/reference/mapping/params/ignore-above.asciidoc b/docs/reference/mapping/params/ignore-above.asciidoc index 3a8e527860a..a9fa5377144 100644 --- a/docs/reference/mapping/params/ignore-above.asciidoc +++ b/docs/reference/mapping/params/ignore-above.asciidoc @@ -1,12 +1,7 @@ [[ignore-above]] === `ignore_above` -Strings longer than the `ignore_above` setting will not be processed by the -<> and will not be indexed. This is mainly useful for -<> string fields, which are typically used for -filtering, aggregations, and sorting. These are structured fields and it -doesn't usually make sense to allow very long terms to be indexed in these -fields. +Strings longer than the `ignore_above` setting will not be indexed or stored. [source,js] -------------------------------------------------- @@ -16,8 +11,7 @@ PUT my_index "my_type": { "properties": { "message": { - "type": "string", - "index": "not_analyzed", + "type": "keyword", "ignore_above": 20 <1> } } diff --git a/docs/reference/mapping/params/include-in-all.asciidoc b/docs/reference/mapping/params/include-in-all.asciidoc index 5061c16d374..76a0d14527c 100644 --- a/docs/reference/mapping/params/include-in-all.asciidoc +++ b/docs/reference/mapping/params/include-in-all.asciidoc @@ -14,10 +14,10 @@ PUT my_index "my_type": { "properties": { "title": { <1> - "type": "string" + "type": "text" } "content": { <1> - "type": "string" + "type": "text" }, "date": { <2> "type": "date", @@ -50,18 +50,18 @@ PUT my_index "my_type": { "include_in_all": false, <1> "properties": { - "title": { "type": "string" }, + "title": { "type": "text" }, "author": { "include_in_all": true, <2> "properties": { - "first_name": { "type": "string" }, - "last_name": { "type": "string" } + "first_name": { "type": "text" }, + "last_name": { "type": "text" } } }, "editor": { "properties": { - "first_name": { "type": "string" }, <3> - "last_name": { "type": "string", "include_in_all": true } <3> + "first_name": { "type": "text" }, <3> + "last_name": { "type": "text", "include_in_all": true } <3> } } } diff --git a/docs/reference/mapping/params/index-options.asciidoc b/docs/reference/mapping/params/index-options.asciidoc index f4608714258..9f327adb969 100644 --- a/docs/reference/mapping/params/index-options.asciidoc +++ b/docs/reference/mapping/params/index-options.asciidoc @@ -39,7 +39,7 @@ PUT my_index "my_type": { "properties": { "text": { - "type": "string", + "type": "text", "index_options": "offsets" } } diff --git a/docs/reference/mapping/params/index.asciidoc b/docs/reference/mapping/params/index.asciidoc index 6dd9151814d..e097293d142 100644 --- a/docs/reference/mapping/params/index.asciidoc +++ b/docs/reference/mapping/params/index.asciidoc @@ -1,48 +1,6 @@ [[mapping-index]] === `index` -The `index` option controls how field values are indexed and, thus, how they -are searchable. It accepts three values: +The `index` option controls whether field values are indexed. It accepts `true` +or `false`. Fields that are not indexed are not queryable. -[horizontal] -`no`:: - - Do not add this field value to the index. With this setting, the field - will not be queryable. - -`not_analyzed`:: - - Add the field value to the index unchanged, as a single term. This is the - default for all fields that support this option except for - <> fields. `not_analyzed` fields are usually used with - <> for structured search. - -`analyzed`:: - - This option applies only to `string` fields, for which it is the default. - The string field value is first <> to convert the - string into terms (e.g. a list of individual words), which are then - indexed. At search time, the query string is passed through - (<>) the same analyzer to generate terms - in the same format as those in the index. It is this process that enables - <>. - -For example, you can create a `not_analyzed` string field with the following: - -[source,js] --------------------------------------------------- -PUT /my_index -{ - "mappings": { - "my_type": { - "properties": { - "status_code": { - "type": "string", - "index": "not_analyzed" - } - } - } - } -} --------------------------------------------------- -// AUTOSENSE \ No newline at end of file diff --git a/docs/reference/mapping/params/multi-fields.asciidoc b/docs/reference/mapping/params/multi-fields.asciidoc index 994d2fddbc1..8ca2809c7f2 100644 --- a/docs/reference/mapping/params/multi-fields.asciidoc +++ b/docs/reference/mapping/params/multi-fields.asciidoc @@ -3,8 +3,8 @@ It is often useful to index the same field in different ways for different purposes. This is the purpose of _multi-fields_. For instance, a `string` -field could be <> as an `analyzed` field for full-text -search, and as a `not_analyzed` field for sorting or aggregations: +field could be mapped as a `text` field for full-text +search, and as a `keyword` field for sorting or aggregations: [source,js] -------------------------------------------------- @@ -14,11 +14,10 @@ PUT /my_index "my_type": { "properties": { "city": { - "type": "string", + "type": "text", "fields": { "raw": { <1> - "type": "string", - "index": "not_analyzed" + "type": "keyword" } } } @@ -57,8 +56,8 @@ GET /my_index/_search } -------------------------------------------------- // AUTOSENSE -<1> The `city.raw` field is a `not_analyzed` version of the `city` field. -<2> The analyzed `city` field can be used for full text search. +<1> The `city.raw` field is a `keyword` version of the `city` field. +<2> The `city` field can be used for full text search. <3> The `city.raw` field can be used for sorting and aggregations NOTE: Multi-fields do not change the original `_source` field. @@ -83,10 +82,10 @@ PUT my_index "my_type": { "properties": { "text": { <1> - "type": "string", + "type": "text", "fields": { "english": { <2> - "type": "string", + "type": "text", "analyzer": "english" } } diff --git a/docs/reference/mapping/params/norms.asciidoc b/docs/reference/mapping/params/norms.asciidoc index f83f93caf56..f6e42219a1f 100644 --- a/docs/reference/mapping/params/norms.asciidoc +++ b/docs/reference/mapping/params/norms.asciidoc @@ -4,14 +4,14 @@ Norms store various normalization factors that are later used at query time in order to compute the score of a document relatively to a query. -Although useful for scoring, norms also require quite a lot of memory +Although useful for scoring, norms also require quite a lot of disk (typically in the order of one byte per document per field in your index, even for documents that don't have this specific field). As a consequence, if you don't need scoring on a specific field, you should disable norms on that field. In particular, this is the case for fields that are used solely for filtering or aggregations. -TIP: The `norms.enabled` setting must have the same setting for fields of the +TIP: The `norms` setting must have the same setting for fields of the same name in the same index. Norms can be disabled on existing fields using the <>. @@ -24,10 +24,8 @@ PUT my_index/_mapping/my_type { "properties": { "title": { - "type": "string", - "norms": { - "enabled": false - } + "type": "text", + "norms": false } } } @@ -41,31 +39,3 @@ results since some documents won't have norms anymore while other documents might still have norms. -==== Lazy loading of norms - -Norms can be loaded into memory eagerly (`eager`), whenever a new segment -comes online, or they can loaded lazily (`lazy`, default), only when the field -is queried. - -Eager loading can be configured as follows: - -[source,js] ------------- -PUT my_index/_mapping/my_type -{ - "properties": { - "title": { - "type": "string", - "norms": { - "loading": "eager" - } - } - } -} ------------- -// AUTOSENSE - -TIP: The `norms.loading` setting must have the same setting for fields of the -same name in the same index. Its value can be updated on existing fields -using the <>. - diff --git a/docs/reference/mapping/params/null-value.asciidoc b/docs/reference/mapping/params/null-value.asciidoc index 4d70d4a6ac5..b77a2e4da69 100644 --- a/docs/reference/mapping/params/null-value.asciidoc +++ b/docs/reference/mapping/params/null-value.asciidoc @@ -16,8 +16,7 @@ PUT my_index "my_type": { "properties": { "status_code": { - "type": "string", - "index": "not_analyzed", + "type": "keyword", "null_value": "NULL" <1> } } @@ -50,6 +49,4 @@ GET my_index/_search <3> A query for `NULL` returns document 1, but not document 2. IMPORTANT: The `null_value` needs to be the same datatype as the field. For -instance, a `long` field cannot have a string `null_value`. String fields -which are `analyzed` will also pass the `null_value` through the configured -analyzer. +instance, a `long` field cannot have a string `null_value`. diff --git a/docs/reference/mapping/params/position-increment-gap.asciidoc b/docs/reference/mapping/params/position-increment-gap.asciidoc index 962e2178469..d2cf1360080 100644 --- a/docs/reference/mapping/params/position-increment-gap.asciidoc +++ b/docs/reference/mapping/params/position-increment-gap.asciidoc @@ -57,7 +57,7 @@ PUT my_index "groups": { "properties": { "names": { - "type": "string", + "type": "text", "position_increment_gap": 0 <1> } } diff --git a/docs/reference/mapping/params/properties.asciidoc b/docs/reference/mapping/params/properties.asciidoc index 7683272ea19..a4f5277649a 100644 --- a/docs/reference/mapping/params/properties.asciidoc +++ b/docs/reference/mapping/params/properties.asciidoc @@ -23,14 +23,14 @@ PUT my_index "manager": { <2> "properties": { "age": { "type": "integer" }, - "name": { "type": "string" } + "name": { "type": "text" } } }, "employees": { <3> "type": "nested", "properties": { "age": { "type": "integer" }, - "name": { "type": "string" } + "name": { "type": "text" } } } } diff --git a/docs/reference/mapping/params/search-analyzer.asciidoc b/docs/reference/mapping/params/search-analyzer.asciidoc index b1aa7b6c688..5a732ee3aac 100644 --- a/docs/reference/mapping/params/search-analyzer.asciidoc +++ b/docs/reference/mapping/params/search-analyzer.asciidoc @@ -41,7 +41,7 @@ PUT /my_index "my_type": { "properties": { "text": { - "type": "string", + "type": "text", "analyzer": "autocomplete", <2> "search_analyzer": "standard" <2> } diff --git a/docs/reference/mapping/params/similarity.asciidoc b/docs/reference/mapping/params/similarity.asciidoc index 0cdbd80a93e..731c557e730 100644 --- a/docs/reference/mapping/params/similarity.asciidoc +++ b/docs/reference/mapping/params/similarity.asciidoc @@ -5,8 +5,8 @@ Elasticsearch allows you to configure a scoring algorithm or _similarity_ per field. The `similarity` setting provides a simple way of choosing a similarity algorithm other than the default TF/IDF, such as `BM25`. -Similarities are mostly useful for <> fields, especially -`analyzed` string fields, but can also apply to other field types. +Similarities are mostly useful for <> fields, but can also apply +to other field types. Custom similarities can be configured by tuning the parameters of the built-in similarities. For more details about this expert options, see the @@ -37,10 +37,10 @@ PUT my_index "my_type": { "properties": { "default_field": { <1> - "type": "string" + "type": "text" }, "bm25_field": { - "type": "string", + "type": "text", "similarity": "BM25" <2> } } diff --git a/docs/reference/mapping/params/store.asciidoc b/docs/reference/mapping/params/store.asciidoc index 46d57e9d8b5..9f10b25724c 100644 --- a/docs/reference/mapping/params/store.asciidoc +++ b/docs/reference/mapping/params/store.asciidoc @@ -24,7 +24,7 @@ PUT /my_index "my_type": { "properties": { "title": { - "type": "string", + "type": "text", "store": true <1> }, "date": { @@ -32,7 +32,7 @@ PUT /my_index "store": true <1> }, "content": { - "type": "string" + "type": "text" } } } diff --git a/docs/reference/mapping/params/term-vector.asciidoc b/docs/reference/mapping/params/term-vector.asciidoc index 74c4c416d95..136ab084149 100644 --- a/docs/reference/mapping/params/term-vector.asciidoc +++ b/docs/reference/mapping/params/term-vector.asciidoc @@ -35,7 +35,7 @@ PUT my_index "my_type": { "properties": { "text": { - "type": "string", + "type": "text", "term_vector": "with_positions_offsets" } } diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index 2ac579f273a..30d6bd56b1f 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -7,7 +7,7 @@ document: [float] === Core datatypes -<>:: `string` +string:: <> and <> <>:: `long`, `integer`, `short`, `byte`, `double`, `float` <>:: `date` <>:: `boolean` @@ -45,9 +45,9 @@ Attachment datatype:: === Multi-fields It is often useful to index the same field in different ways for different -purposes. For instance, a `string` field could be <> as -an `analyzed` field for full-text search, and as a `not_analyzed` field for -sorting or aggregations. Alternatively, you could index a string field with +purposes. For instance, a `string` field could be mapped as +a `text` field for full-text search, and as a `keyword` field for +sorting or aggregations. Alternatively, you could index a text field with the <>, the <> analyzer, and the <>. @@ -69,6 +69,8 @@ include::types/geo-shape.asciidoc[] include::types/ip.asciidoc[] +include::types/keyword.asciidoc[] + include::types/nested.asciidoc[] include::types/numeric.asciidoc[] @@ -77,6 +79,8 @@ include::types/object.asciidoc[] include::types/string.asciidoc[] +include::types/text.asciidoc[] + include::types/token-count.asciidoc[] diff --git a/docs/reference/mapping/types/binary.asciidoc b/docs/reference/mapping/types/binary.asciidoc index 4e5f6b4bc27..7f82523416f 100644 --- a/docs/reference/mapping/types/binary.asciidoc +++ b/docs/reference/mapping/types/binary.asciidoc @@ -13,7 +13,7 @@ PUT my_index "my_type": { "properties": { "name": { - "type": "string" + "type": "text" }, "blob": { "type": "binary" diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc new file mode 100644 index 00000000000..66c7135c37d --- /dev/null +++ b/docs/reference/mapping/types/keyword.asciidoc @@ -0,0 +1,111 @@ +[[keyword]] +=== Keyword datatype + +A field to index structured content such as email addresses, hostnames, status +codes, zip codes or tags. + +They are typically used for filtering (_Find me all blog posts where +++status++ is ++published++_), for sorting, and for aggregations. Keyword +fields are ony searchable by their exact value. + +If you need to index full text content such as email bodies or product +descriptions, it is likely that you should rather use a <> field. + +Below is an example of a mapping for a keyword field: + +[source,js] +-------------------------------- +PUT my_index +{ + "mappings": { + "my_type": { + "properties": { + "tags": { + "type": "keyword" + } + } + } + } +} +-------------------------------- +// AUTOSENSE + +[[keyword-params]] +==== Parameters for keyword fields + +The following parameters are accepted by `string` fields: + +[horizontal] + +<>:: + + Mapping field-level query time boosting. Accepts a floating point number, defaults + to `1.0`. + +<>:: + + Should the field be stored on disk in a column-stride fashion, so that it + can later be used for sorting, aggregations, or scripting? Accepts `true` + (default) or `false`. + +<>:: + + Should global ordinals be loaded eagerly on refresh? Accepts `true` or `false` + (default). Enabling this is a good idea on fields that are frequently used for + terms aggregations. + +<>:: + + Multi-fields allow the same string value to be indexed in multiple ways for + different purposes, such as one field for search and a multi-field for + sorting and aggregations. + +<>:: + + Do not index or analyze any string longer than this value. Defaults to + `2147483647` so that all values would be accepted. + +<>:: + + Whether or not the field value should be included in the + <> field? Accepts `true` or `false`. Defaults + to `false` if <> is set to `no`, or if a parent + <> field sets `include_in_all` to `false`. + Otherwise defaults to `true`. + +<>:: + + Should the field be searchable? Accepts `true` (default) or `false`. + +<>:: + + What information should be stored in the index, for scoring purposes. + Defaults to `docs` but can also be set to `freqs` to take term frequency into account + when computing scores. + +<>:: + + Whether field-length should be taken into account when scoring queries. + Accepts `true` or `false` (default). + +<>:: + + Accepts a string value which is substituted for any explicit `null` + values. Defaults to `null`, which means the field is treated as missing. + +<>:: + + Whether the field value should be stored and retrievable separately from + the <> field. Accepts `true` or `false` + (default). + +<>:: + + The <> that should be used at search time on + <> fields. Defaults to the `analyzer` setting. + +<>:: + + Which scoring algorithm or _similarity_ should be used. Defaults + to `classic`, which uses TF/IDF. + diff --git a/docs/reference/mapping/types/object.asciidoc b/docs/reference/mapping/types/object.asciidoc index 0d159d7e1ef..6d35e6aee97 100644 --- a/docs/reference/mapping/types/object.asciidoc +++ b/docs/reference/mapping/types/object.asciidoc @@ -46,16 +46,15 @@ PUT my_index "my_type": { <1> "properties": { "region": { - "type": "string", - "index": "not_analyzed" + "type": "keyword" }, "manager": { <2> "properties": { "age": { "type": "integer" }, "name": { <3> "properties": { - "first": { "type": "string" }, - "last": { "type": "string" } + "first": { "type": "text" }, + "last": { "type": "text" } } } } diff --git a/docs/reference/mapping/types/string.asciidoc b/docs/reference/mapping/types/string.asciidoc index 6ff78aa4732..88ca4e1d920 100644 --- a/docs/reference/mapping/types/string.asciidoc +++ b/docs/reference/mapping/types/string.asciidoc @@ -1,179 +1,4 @@ [[string]] === String datatype -Fields of type `string` accept text values. Strings may be sub-divided into: - -Full text:: -+ --- - -Full text values, like the body of an email, are typically used for text based -relevance searches, such as: _Find the most relevant documents that match a -query for "quick brown fox"_. - -These fields are `analyzed`, that is they are passed through an -<> to convert the string into a list of individual terms -before being indexed. The analysis process allows Elasticsearch to search for -individual words _within_ each full text field. Full text fields are not -used for sorting and seldom used for aggregations (although the -<> is a notable exception). - --- - -Keywords:: - -Keywords are exact values like email addresses, hostnames, status codes, or -tags. They are typically used for filtering (_Find me all blog posts where -++status++ is ++published++_), for sorting, and for aggregations. Keyword -fields are `not_analyzed`. Instead, the exact string value is added to the -index as a single term. - -Below is an example of a mapping for a full text (`analyzed`) and a keyword -(`not_analyzed`) string field: - -[source,js] --------------------------------- -PUT my_index -{ - "mappings": { - "my_type": { - "properties": { - "full_name": { <1> - "type": "string" - }, - "status": { - "type": "string", <2> - "index": "not_analyzed" - } - } - } - } -} --------------------------------- -// AUTOSENSE -<1> The `full_name` field is an `analyzed` full text field -- `index:analyzed` is the default. -<2> The `status` field is a `not_analyzed` keyword field. - -Sometimes it is useful to have both a full text (`analyzed`) and a keyword -(`not_analyzed`) version of the same field: one for full text search and the -other for aggregations and sorting. This can be achieved with -<>. - - -[[string-params]] -==== Parameters for string fields - -The following parameters are accepted by `string` fields: - -[horizontal] - -<>:: - - The <> which should be used for - <> string fields, both at index-time and at - search-time (unless overridden by the <>). - Defaults to the default index analyzer, or the - <>. - -<>:: - - Mapping field-level query time boosting. Accepts a floating point number, defaults - to `1.0`. - -<>:: - - Should the field be stored on disk in a column-stride fashion, so that it - can later be used for sorting, aggregations, or scripting? Accepts `true` - or `false`. Defaults to `true` for `not_analyzed` fields. Analyzed fields - do not support doc values. - -<>:: - - Can the field use in-memory fielddata for sorting, aggregations, - or scripting? Accepts `disabled` or `paged_bytes` (default). - Not analyzed fields will use <> in preference - to fielddata. - -<>:: - - Multi-fields allow the same string value to be indexed in multiple ways for - different purposes, such as one field for search and a multi-field for - sorting and aggregations, or the same string value analyzed by different - analyzers. - -<>:: - - Do not index or analyze any string longer than this value. Defaults to `0` (disabled). - -<>:: - - Whether or not the field value should be included in the - <> field? Accepts `true` or `false`. Defaults - to `false` if <> is set to `no`, or if a parent - <> field sets `include_in_all` to `false`. - Otherwise defaults to `true`. - -<>:: - - Should the field be searchable? Accepts `analyzed` (default, treat as full-text field), - `not_analyzed` (treat as keyword field) and `no`. - -<>:: - - What information should be stored in the index, for search and highlighting purposes. - Defaults to `positions` for <> fields, and to `docs` for - `not_analyzed` fields. - - -<>:: -+ --- - -Whether field-length should be taken into account when scoring queries. -Defaults depend on the <> setting: - -* `analyzed` fields default to `{ "enabled": true, "loading": "lazy" }`. -* `not_analyzed` fields default to `{ "enabled": false }`. --- - -<>:: - - Accepts a string value which is substituted for any explicit `null` - values. Defaults to `null`, which means the field is treated as missing. - If the field is `analyzed`, the `null_value` will also be analyzed. - -<>:: - - The number of fake term positions which should be inserted between - each element of an array of strings. Defaults to 0. - The number of fake term position which should be inserted between each - element of an array of strings. Defaults to the position_increment_gap - configured on the analyzer which defaults to 100. 100 was chosen because it - prevents phrase queries with reasonably large slops (less than 100) from - matching terms across field values. - -<>:: - - Whether the field value should be stored and retrievable separately from - the <> field. Accepts `true` or `false` - (default). - -<>:: - - The <> that should be used at search time on - <> fields. Defaults to the `analyzer` setting. - -<>:: - - The <> that should be used at search time when a - phrase is encountered. Defaults to the `search_analyzer` setting. - -<>:: - - Which scoring algorithm or _similarity_ should be used. Defaults - to `classic`, which uses TF/IDF. - -<>:: - - Whether term vectors should be stored for an <> - field. Defaults to `no`. +NOTE: The `string` field has been removed in favor of the `text` and `keyword` fields. diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc new file mode 100644 index 00000000000..7798b2c41f0 --- /dev/null +++ b/docs/reference/mapping/types/text.asciidoc @@ -0,0 +1,139 @@ +[[text]] +=== Text datatype + +A field to index full-text values, such as the body of on email or the +description of a product. These fields are `analyzed`, that is they are passed through an +<> to convert the string into a list of individual terms +before being indexed. The analysis process allows Elasticsearch to search for +individual words _within_ each full text field. Text fields are not +used for sorting and seldom used for aggregations (although the +<> +is a notable exception). + +If you need to index structured content such as email addresses, hostnames, status +codes, or tags, it is likely that you should rather use a <> field. + +Below is an example of a mapping for a text field: + +[source,js] +-------------------------------- +PUT my_index +{ + "mappings": { + "my_type": { + "properties": { + "full_name": { + "type": "text" + } + } + } + } +} +-------------------------------- +// AUTOSENSE + +Sometimes it is useful to have both a full text (`text`) and a keyword +(`keyword`) version of the same field: one for full text search and the +other for aggregations and sorting. This can be achieved with +<>. + +[[text-params]] +==== Parameters for text fields + +The following parameters are accepted by `text` fields: + +[horizontal] + +<>:: + + The <> which should be used for + <> string fields, both at index-time and at + search-time (unless overridden by the <>). + Defaults to the default index analyzer, or the + <>. + +<>:: + + Mapping field-level query time boosting. Accepts a floating point number, defaults + to `1.0`. + +<>:: + + Should global ordinals be loaded eagerly on refresh? Accepts `true` or `false` + (default). Enabling this is a good idea on fields that are frequently used for + (significant) terms aggregations. + +<>:: + + Can the field use in-memory fielddata for sorting, aggregations, + or scripting? Accepts `true` or `false` (default). + +<>:: + + Expert settings which allow to decide which values to load in memory when `fielddata` + is enabled. By default all values are loaded. + +<>:: + + Multi-fields allow the same string value to be indexed in multiple ways for + different purposes, such as one field for search and a multi-field for + sorting and aggregations, or the same string value analyzed by different + analyzers. + +<>:: + + Whether or not the field value should be included in the + <> field? Accepts `true` or `false`. Defaults + to `false` if <> is set to `no`, or if a parent + <> field sets `include_in_all` to `false`. + Otherwise defaults to `true`. + +<>:: + + Should the field be searchable? Accepts `true` (default) or `false`. + +<>:: + + What information should be stored in the index, for search and highlighting purposes. + Defaults to `positions`. + +<>:: + + Whether field-length should be taken into account when scoring queries. + Accepts `true` (default) or `false`. + +<>:: + + The number of fake term positions which should be inserted between + each element of an array of strings. Defaults to 0. + The number of fake term position which should be inserted between each + element of an array of strings. Defaults to the position_increment_gap + configured on the analyzer which defaults to 100. 100 was chosen because it + prevents phrase queries with reasonably large slops (less than 100) from + matching terms across field values. + +<>:: + + Whether the field value should be stored and retrievable separately from + the <> field. Accepts `true` or `false` + (default). + +<>:: + + The <> that should be used at search time on + <> fields. Defaults to the `analyzer` setting. + +<>:: + + The <> that should be used at search time when a + phrase is encountered. Defaults to the `search_analyzer` setting. + +<>:: + + Which scoring algorithm or _similarity_ should be used. Defaults + to `classic`, which uses TF/IDF. + +<>:: + + Whether term vectors should be stored for an <> + field. Defaults to `no`. diff --git a/docs/reference/mapping/types/token-count.asciidoc b/docs/reference/mapping/types/token-count.asciidoc index ca2ed6f0428..baa6d409ead 100644 --- a/docs/reference/mapping/types/token-count.asciidoc +++ b/docs/reference/mapping/types/token-count.asciidoc @@ -15,7 +15,7 @@ PUT my_index "my_type": { "properties": { "name": { <1> - "type": "string", + "type": "text", "fields": { "length": { <2> "type": "token_count", diff --git a/docs/reference/migration/migrate_5_0/java.asciidoc b/docs/reference/migration/migrate_5_0/java.asciidoc index dc60ab58391..59f0b1c15e2 100644 --- a/docs/reference/migration/migrate_5_0/java.asciidoc +++ b/docs/reference/migration/migrate_5_0/java.asciidoc @@ -225,3 +225,58 @@ The `addSuggestion` method now required the user specified suggestion name, prev ===== SuggestionBuilder The `field` setter has been deleted. Instead the field name needs to be specified as constructor argument. + +==== SearchSourceBuilder + +All methods which take an `XContentBuilder`, `BytesReference` `Map` or `bytes[]` have been removed in favor of providing the +relevant builder object for that feature (e.g. `HighlightBuilder`, `AggregationBuilder`, `SuggestBuilder`) . This means that all search requests +can now be validated at call time which results in much clearer errors. + +The `defaultResourceWindowSize(int)` method has been removed. The window size should be set explicitly on all `RescoreBuilder` objects. + +==== SearchRequestBuilder + +All methods which take an `XContentBuilder`, `BytesReference` `Map` or `bytes[]` have been removed in favor of providing the +relevant builder object for that feature (e.g. `HighlightBuilder`, `AggregationBuilder`, `SuggestBuilder`) . This means that all search requests +can now be validated at call time which results in much clearer errors. + +All highlighter methods have been removed in favor of a single `highlighter(HighlightBuilder)` method. + +The `setExtraSource(SearchSourceBuilder)` method has been removed. + +The `setTemplateSource(String)` and `setTemplateSource(BytesReference)` methods have been removed. Use `setTemplate(Template)` instead. + +`setRescorer(Rescorer)` and `setRescorer(Rescorer, int)` have been removed infavor of `setRescorer(RescoreBuilder)` and `setRescorer(RescoreBuilder, int)` + +==== SearchRequest + +All `template` methods have been removed in favor of a single `template(Template)` method. + +All `source` methods have been removed in favor of a single `source(SearchSourceBuilder)` method. This means that all search requests can now be validated +at call time which results in much clearer errors. + +All `extraSource` methods have been removed. + +==== AggregationBuilder + +All methods which take an `XContentBuilder`, `BytesReference` `Map` or `bytes[]` have been removed in favor of providing the +relevant builder object (i.e. `subAggregation(AggregationBuilder)` or `subAggregation(PipelineAggregationBuilder)`). This means that all +requests can now be validated at call time which results in much clearer errors. + +==== ValidateQueryRequest + +`source(QuerySourceBuilder)`, `source(Map)`, `source(XContentBuilder)`, `source(String)`, `source(byte[])`, `source(byte[], int, int)`, +`source(BytesReference)` and `source()` have been removed in favor of using `query(QueryBuilder)` and `query()` + +==== ValidateQueryRequestBuilder + +`setSource()` methods have been removed in favor of using `setQuery(QueryBuilder)` + +==== ExplainRequest + +`source(QuerySourceBuilder)`, `source(Map)`, `source(BytesReference)` and `source()` have been removed in favor of using +`query(QueryBuilder)` and `query()` + +==== ExplainRequestBuilder + +The `setQuery(BytesReference)` method have been removed in favor of using `setQuery(QueryBuilder)` diff --git a/docs/reference/migration/migrate_5_0/mapping.asciidoc b/docs/reference/migration/migrate_5_0/mapping.asciidoc index 768a2438d3e..23298cd733c 100644 --- a/docs/reference/migration/migrate_5_0/mapping.asciidoc +++ b/docs/reference/migration/migrate_5_0/mapping.asciidoc @@ -16,6 +16,26 @@ values. For backwards compatibility purposes, during the 5.x series: with `string` fields are no longer possible with `text`/`keyword` fields such as enabling `term_vectors` on a not-analyzed `keyword` field. +==== Default string mappings + +String mappings now have the following default mappings: + +[source,json] +--------------- +{ + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } +} +--------------- + +This allows to perform full-text search on the original field name and to sort +and run aggregations on the sub keyword field. + ==== `index` property On all field datatypes (except for the deprecated `string` field), the `index` @@ -35,12 +55,22 @@ now defaults to using `float` instead of `double`. The reasoning is that floats should be more than enough for most cases but would decrease storage requirements significantly. +==== `norms` + +`norms` now take a boolean instead of an object. This boolean is the replacement +for `norms.enabled`. There is no replacement for `norms.loading` since eager +loading of norms is not useful anymore now that norms are disk-based. + ==== `fielddata.format` Setting `fielddata.format: doc_values` in the mappings used to implicitly enable doc-values on a field. This no longer works: the only way to enable or disable doc-values is by using the `doc_values` property of mappings. +==== `fielddata.frequency.regex` + +Regex filters are not supported anymore and will be dropped on upgrade. + ==== Source-transform removed The source `transform` feature has been removed. Instead, use an ingest pipeline diff --git a/docs/reference/migration/migrate_5_0/search.asciidoc b/docs/reference/migration/migrate_5_0/search.asciidoc index fad75247a23..595cf2e3fe9 100644 --- a/docs/reference/migration/migrate_5_0/search.asciidoc +++ b/docs/reference/migration/migrate_5_0/search.asciidoc @@ -122,6 +122,8 @@ in favour of `query` and `no_match_query`. upper limit is needed then the `max_children` parameter shouldn't be specified at all. +* The `exists` query will now fail if the `_field_names` field is disabled. + ==== Top level `filter` parameter diff --git a/docs/reference/query-dsl/exists-query.asciidoc b/docs/reference/query-dsl/exists-query.asciidoc index 404dce4a4ae..b484d47f4b6 100644 --- a/docs/reference/query-dsl/exists-query.asciidoc +++ b/docs/reference/query-dsl/exists-query.asciidoc @@ -47,7 +47,7 @@ instance, if the `user` field were mapped as follows: [source,js] -------------------------------------------------- "user": { - "type": "string", + "type": "text", "null_value": "_null_" } -------------------------------------------------- diff --git a/docs/reference/query-dsl/mlt-query.asciidoc b/docs/reference/query-dsl/mlt-query.asciidoc index ce2d34144ee..d2d521f9492 100644 --- a/docs/reference/query-dsl/mlt-query.asciidoc +++ b/docs/reference/query-dsl/mlt-query.asciidoc @@ -116,18 +116,18 @@ curl -s -XPUT 'http://localhost:9200/imdb/' -d '{ "movies": { "properties": { "title": { - "type": "string", + "type": "text", "term_vector": "yes" }, "description": { - "type": "string" + "type": "text" }, "tags": { - "type": "string", + "type": "text", "fields" : { "raw": { - "type" : "string", - "index" : "not_analyzed", + "type" : "text", + "analyzer": "keyword", "term_vector" : "yes" } } diff --git a/docs/reference/query-dsl/term-query.asciidoc b/docs/reference/query-dsl/term-query.asciidoc index 85608ca3aa5..801abf65af8 100644 --- a/docs/reference/query-dsl/term-query.asciidoc +++ b/docs/reference/query-dsl/term-query.asciidoc @@ -49,13 +49,13 @@ GET /_search .Why doesn't the `term` query match my document? ************************************************** -String fields can be `analyzed` (treated as full text, like the body of an -email), or `not_analyzed` (treated as exact values, like an email address or a -zip code). Exact values (like numbers, dates, and `not_analyzed` strings) have +String fields can be of type `text` (treated as full text, like the body of an +email), or `keyword` (treated as exact values, like an email address or a +zip code). Exact values (like numbers, dates, and keywords) have the exact value specified in the field added to the inverted index in order to make them searchable. -By default, however, `string` fields are `analyzed`. This means that their +However, `text` fields are `analyzed`. This means that their values are first passed through an <> to produce a list of terms, which are then added to the inverted index. @@ -70,7 +70,7 @@ within a big block of full text. The `term` query looks for the *exact* term in the field's inverted index -- it doesn't know anything about the field's analyzer. This makes it useful for -looking up values in `not_analyzed` string fields, or in numeric or date +looking up values in keyword fields, or in numeric or date fields. When querying full text fields, use the <> instead, which understands how the field has been analyzed. @@ -86,11 +86,10 @@ PUT my_index "my_type": { "properties": { "full_text": { - "type": "string" <1> + "type": "text" <1> }, "exact_value": { - "type": "string", - "index": "not_analyzed" <2> + "type": "keyword" <2> } } } @@ -105,8 +104,8 @@ PUT my_index/my_type/1 -------------------------------------------------- // AUTOSENSE -<1> The `full_text` field is `analyzed` by default. -<2> The `exact_value` field is set to be `not_analyzed`. +<1> The `full_text` field is of type `text` and will be analyzed. +<2> The `exact_value` field is of type `keyword` and will NOT be analyzed. <3> The `full_text` inverted index will contain the terms: [`quick`, `foxes`]. <4> The `exact_value` inverted index will contain the exact term: [`Quick Foxes!`]. diff --git a/modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractAsyncBulkByScrollAction.java b/modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractAsyncBulkByScrollAction.java index 2eb0cc5ba78..a1564aa136a 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractAsyncBulkByScrollAction.java +++ b/modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractAsyncBulkByScrollAction.java @@ -54,6 +54,7 @@ import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; @@ -63,13 +64,14 @@ import static java.util.Collections.emptyList; import static java.util.Collections.unmodifiableList; import static org.elasticsearch.action.bulk.BackoffPolicy.exponentialBackoff; import static org.elasticsearch.common.unit.TimeValue.timeValueNanos; +import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; import static org.elasticsearch.index.reindex.AbstractBulkByScrollRequest.SIZE_ALL_MATCHES; import static org.elasticsearch.rest.RestStatus.CONFLICT; import static org.elasticsearch.search.sort.SortBuilders.fieldSort; /** * Abstract base for scrolling across a search and executing bulk actions on all - * results. + * results. All package private methods are package private so their tests can use them. */ public abstract class AbstractAsyncBulkByScrollAction, Response> { /** @@ -81,6 +83,7 @@ public abstract class AbstractAsyncBulkByScrollAction scroll = new AtomicReference<>(); + private final AtomicLong lastBatchStartTime = new AtomicLong(-1); private final Set destinationIndices = Collections.newSetFromMap(new ConcurrentHashMap<>()); private final ESLogger logger; @@ -107,15 +110,10 @@ public abstract class AbstractAsyncBulkByScrollAction indexingFailures, List searchFailures, boolean timedOut); + /** + * Start the action by firing the initial search request. + */ public void start() { - initialSearch(); - } - - public BulkByScrollTask getTask() { - return task; - } - - void initialSearch() { if (task.isCancelled()) { finishHim(null); return; @@ -137,7 +135,7 @@ public abstract class AbstractAsyncBulkByScrollAction() { @Override public void onResponse(SearchResponse response) { - onScrollResponse(response); + onScrollResponse(timeValueNanos(max(0, earliestNextBatchStartTime - System.nanoTime())), response); } @Override @@ -308,6 +325,21 @@ public abstract class AbstractAsyncBulkByScrollAction failures) { if (failure.getStatus() == CONFLICT) { task.countVersionConflict(); @@ -318,6 +350,9 @@ public abstract class AbstractAsyncBulkByScrollAction indexingFailures, List searchFailures, boolean timedOut) { if (task.isCancelled() || false == mainRequest.isRefresh()) { finishHim(null, indexingFailures, searchFailures, timedOut); @@ -385,7 +420,7 @@ public abstract class AbstractAsyncBulkByScrollAction, Response extends BulkIndexByScrollResponse, - TA extends TransportAction> extends BaseRestHandler { +public abstract class AbstractBaseReindexRestHandler< + Request extends AbstractBulkByScrollRequest, + Response extends BulkIndexByScrollResponse, + TA extends TransportAction + > extends BaseRestHandler { protected final IndicesQueriesRegistry indicesQueriesRegistry; protected final AggregatorParsers aggParsers; protected final Suggesters suggesters; @@ -59,6 +61,7 @@ public abstract class AbstractBaseReindexRestHandler(channel)); return; diff --git a/modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractBulkByScrollRequest.java b/modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractBulkByScrollRequest.java index 41b436e6074..29e2acb352e 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractBulkByScrollRequest.java +++ b/modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractBulkByScrollRequest.java @@ -85,6 +85,13 @@ public abstract class AbstractBulkByScrollRequest 0) { + throttledNanos.addAndGet(nanos); + } + } } diff --git a/modules/reindex/src/test/java/org/elasticsearch/index/reindex/AsyncBulkByScrollActionTests.java b/modules/reindex/src/test/java/org/elasticsearch/index/reindex/AsyncBulkByScrollActionTests.java index 5ff471133f8..44201daff9a 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/index/reindex/AsyncBulkByScrollActionTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/index/reindex/AsyncBulkByScrollActionTests.java @@ -39,6 +39,7 @@ import org.elasticsearch.action.search.ClearScrollRequest; import org.elasticsearch.action.search.ClearScrollResponse; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.SearchScrollRequest; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.replication.ReplicationRequest; @@ -67,12 +68,14 @@ import org.junit.After; import org.junit.Before; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executor; +import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -83,10 +86,15 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.singleton; import static org.apache.lucene.util.TestUtil.randomSimpleString; import static org.elasticsearch.action.bulk.BackoffPolicy.constantBackoff; +import static org.elasticsearch.common.unit.TimeValue.parseTimeValue; import static org.elasticsearch.common.unit.TimeValue.timeValueMillis; +import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds; +import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.emptyCollectionOf; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; @@ -99,17 +107,24 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { private String scrollId; private TaskManager taskManager; private BulkByScrollTask task; + private Map expectedHeaders = new HashMap<>(); @Before public void setupForTest() { client = new MyMockClient(new NoOpClient(getTestName())); threadPool = new ThreadPool(getTestName()); mainRequest = new DummyAbstractBulkByScrollRequest(); - firstSearchRequest = null; + firstSearchRequest = new SearchRequest().scroll(timeValueSeconds(10)); listener = new PlainActionFuture<>(); scrollId = null; taskManager = new TaskManager(Settings.EMPTY); task = (BulkByScrollTask) taskManager.register("don'tcare", "hereeither", mainRequest); + + // Fill the context with something random so we can make sure we inherited it appropriately. + expectedHeaders.clear(); + expectedHeaders.put(randomSimpleString(random()), randomSimpleString(random())); + threadPool.getThreadContext().newStoredContext(); + threadPool.getThreadContext().putHeader(expectedHeaders); } @After @@ -135,34 +150,35 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { long total = randomIntBetween(0, Integer.MAX_VALUE); InternalSearchHits hits = new InternalSearchHits(null, total, 0); InternalSearchResponse searchResponse = new InternalSearchResponse(hits, null, null, null, false, false); - new DummyAbstractAsyncBulkByScrollAction() - .onScrollResponse(new SearchResponse(searchResponse, scrollId(), 5, 4, randomLong(), null)); + new DummyAbstractAsyncBulkByScrollAction().onScrollResponse(timeValueSeconds(0), + new SearchResponse(searchResponse, scrollId(), 5, 4, randomLong(), null)); assertEquals(total, task.getStatus().getTotal()); } - public void testEachScrollResponseIsABatch() { - // Replace the generic thread pool with one that executes immediately so the batch is updated immediately - threadPool.shutdown(); - threadPool = new ThreadPool(getTestName()) { - @Override - public Executor generic() { - return new Executor() { - @Override - public void execute(Runnable command) { - command.run(); - } - }; - } - }; + /** + * Tests that each scroll response is a batch and that the batch is launched properly. + */ + public void testScrollResponseBatchingBehavior() throws Exception { int maxBatches = randomIntBetween(0, 100); for (int batches = 1; batches < maxBatches; batches++) { + long now = System.nanoTime(); InternalSearchHit hit = new InternalSearchHit(0, "id", new Text("type"), emptyMap()); InternalSearchHits hits = new InternalSearchHits(new InternalSearchHit[] { hit }, 0, 0); InternalSearchResponse searchResponse = new InternalSearchResponse(hits, null, null, null, false, false); - new DummyAbstractAsyncBulkByScrollAction() - .onScrollResponse(new SearchResponse(searchResponse, scrollId(), 5, 4, randomLong(), null)); + DummyAbstractAsyncBulkByScrollAction action = new DummyAbstractAsyncBulkByScrollAction(); + action.onScrollResponse(timeValueSeconds(0), + new SearchResponse(searchResponse, scrollId(), 5, 4, randomLong(), null)); - assertEquals(batches, task.getStatus().getBatches()); + // Use assert busy because the update happens on another thread + final int expectedBatches = batches; + assertBusy(() -> assertEquals(expectedBatches, task.getStatus().getBatches())); + + /* + * While we're here we can check that getting a scroll response sets the last scroll start time which makes sure the wait time + * isn't counted as time that the last batch took. + */ + assertThat(action.getLastBatchStartTime(), greaterThanOrEqualTo(now)); + assertEquals(expectedHeaders, client.lastHeaders.get()); } } @@ -220,22 +236,20 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { * Mimicks a ThreadPool rejecting execution of the task. */ public void testThreadPoolRejectionsAbortRequest() throws Exception { + TimeValue expectedDelay = parseTimeValue(randomPositiveTimeValue(), "test"); threadPool.shutdown(); threadPool = new ThreadPool(getTestName()) { @Override - public Executor generic() { - return new Executor() { - @Override - public void execute(Runnable command) { - ((AbstractRunnable) command).onRejection(new EsRejectedExecutionException("test")); - } - }; + public ScheduledFuture schedule(TimeValue delay, String name, Runnable command) { + assertEquals(expectedDelay, delay); // While we're here we can check that the sleep made it through + ((AbstractRunnable) command).onRejection(new EsRejectedExecutionException("test")); + return null; } }; InternalSearchHits hits = new InternalSearchHits(null, 0, 0); InternalSearchResponse searchResponse = new InternalSearchResponse(hits, null, null, null, false, false); new DummyAbstractAsyncBulkByScrollAction() - .onScrollResponse(new SearchResponse(searchResponse, scrollId(), 5, 4, randomLong(), null)); + .onScrollResponse(expectedDelay, new SearchResponse(searchResponse, scrollId(), 5, 4, randomLong(), null)); try { listener.get(); fail("Expected a failure"); @@ -243,6 +257,9 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { assertThat(e.getMessage(), equalTo("EsRejectedExecutionException[test]")); } assertThat(client.scrollsCleared, contains(scrollId)); + + // While we're mocking the threadPool lets also check that we incremented the throttle counter + assertEquals(expectedDelay, task.getStatus().getThrottled()); } /** @@ -252,7 +269,7 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { public void testShardFailuresAbortRequest() throws Exception { ShardSearchFailure shardFailure = new ShardSearchFailure(new RuntimeException("test")); InternalSearchResponse internalResponse = new InternalSearchResponse(null, null, null, null, false, null); - new DummyAbstractAsyncBulkByScrollAction().onScrollResponse( + new DummyAbstractAsyncBulkByScrollAction().onScrollResponse(timeValueSeconds(0), new SearchResponse(internalResponse, scrollId(), 5, 4, randomLong(), new ShardSearchFailure[] { shardFailure })); BulkIndexByScrollResponse response = listener.get(); assertThat(response.getIndexingFailures(), emptyCollectionOf(Failure.class)); @@ -267,8 +284,8 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { */ public void testSearchTimeoutsAbortRequest() throws Exception { InternalSearchResponse internalResponse = new InternalSearchResponse(null, null, null, null, true, null); - new DummyAbstractAsyncBulkByScrollAction() - .onScrollResponse(new SearchResponse(internalResponse, scrollId(), 5, 4, randomLong(), new ShardSearchFailure[0])); + new DummyAbstractAsyncBulkByScrollAction().onScrollResponse(timeValueSeconds(0), + new SearchResponse(internalResponse, scrollId(), 5, 4, randomLong(), new ShardSearchFailure[0])); BulkIndexByScrollResponse response = listener.get(); assertThat(response.getIndexingFailures(), emptyCollectionOf(Failure.class)); assertThat(response.getSearchFailures(), emptyCollectionOf(ShardSearchFailure.class)); @@ -304,7 +321,7 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { InternalSearchHit hit = new InternalSearchHit(0, "id", new Text("type"), emptyMap()); InternalSearchHits hits = new InternalSearchHits(new InternalSearchHit[] {hit}, 0, 0); InternalSearchResponse searchResponse = new InternalSearchResponse(hits, null, null, null, false, false); - action.onScrollResponse(new SearchResponse(searchResponse, scrollId(), 5, 4, randomLong(), null)); + action.onScrollResponse(timeValueSeconds(0), new SearchResponse(searchResponse, scrollId(), 5, 4, randomLong(), null)); try { listener.get(); fail("Expected failure."); @@ -334,6 +351,55 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { assertEquals(retryAttempts, task.getStatus().getRetries()); } + public void testPerfectlyThrottledBatchTime() { + DummyAbstractAsyncBulkByScrollAction action = new DummyAbstractAsyncBulkByScrollAction(); + mainRequest.setRequestsPerSecond(0); + assertThat((double) action.perfectlyThrottledBatchTime(randomInt()), closeTo(0f, 0f)); + + int total = between(0, 1000000); + mainRequest.setRequestsPerSecond(1); + assertThat((double) action.perfectlyThrottledBatchTime(total), + closeTo(TimeUnit.SECONDS.toNanos(total), TimeUnit.SECONDS.toNanos(1))); + } + + public void testScrollDelay() throws Exception { + /* + * Replace the thread pool with one that will save the delay sent for the command. We'll use that to check that we used a proper + * delay for throttling. + */ + AtomicReference capturedDelay = new AtomicReference<>(); + threadPool.shutdown(); + threadPool = new ThreadPool(getTestName()) { + @Override + public ScheduledFuture schedule(TimeValue delay, String name, Runnable command) { + capturedDelay.set(delay); + return null; + } + }; + + DummyAbstractAsyncBulkByScrollAction action = new DummyAbstractAsyncBulkByScrollAction(); + action.setScroll(scrollId()); + + // We'd like to get about 1 request a second + mainRequest.setRequestsPerSecond(1f); + // Make the last scroll look nearly instant + action.setLastBatchStartTime(System.nanoTime()); + // The last batch had 100 documents + action.startNextScroll(100); + + // So the next request is going to have to wait an extra 100 seconds or so (base was 10, so 110ish) + assertThat(client.lastScroll.get().request.scroll().keepAlive().seconds(), either(equalTo(110L)).or(equalTo(109L))); + + // Now we can simulate a response and check the delay that we used for the task + InternalSearchHit hit = new InternalSearchHit(0, "id", new Text("type"), emptyMap()); + InternalSearchHits hits = new InternalSearchHits(new InternalSearchHit[] { hit }, 0, 0); + InternalSearchResponse searchResponse = new InternalSearchResponse(hits, null, null, null, false, false); + client.lastScroll.get().listener.onResponse(new SearchResponse(searchResponse, scrollId(), 5, 4, randomLong(), null)); + + // The delay is still 100ish seconds because there hasn't been much time between when we requested the bulk and when we got it. + assertThat(capturedDelay.get().seconds(), either(equalTo(100L)).or(equalTo(99L))); + } + private long retryTestCase(boolean failWithRejection) throws Exception { int totalFailures = randomIntBetween(1, mainRequest.getMaxRetries()); int size = randomIntBetween(1, 100); @@ -353,7 +419,7 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { } @Override - void startNextScroll() { + void startNextScroll(int lastBatchSize) { successLatch.countDown(); } }; @@ -418,12 +484,12 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { } public void testCancelBeforeInitialSearch() throws Exception { - cancelTaskCase((DummyAbstractAsyncBulkByScrollAction action) -> action.initialSearch()); + cancelTaskCase((DummyAbstractAsyncBulkByScrollAction action) -> action.start()); } public void testCancelBeforeScrollResponse() throws Exception { // We bail so early we don't need to pass in a half way valid response. - cancelTaskCase((DummyAbstractAsyncBulkByScrollAction action) -> action.onScrollResponse(null)); + cancelTaskCase((DummyAbstractAsyncBulkByScrollAction action) -> action.onScrollResponse(timeValueSeconds(0), null)); } public void testCancelBeforeSendBulkRequest() throws Exception { @@ -437,7 +503,7 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { } public void testCancelBeforeStartNextScroll() throws Exception { - cancelTaskCase((DummyAbstractAsyncBulkByScrollAction action) -> action.startNextScroll()); + cancelTaskCase((DummyAbstractAsyncBulkByScrollAction action) -> action.startNextScroll(0)); } public void testCancelBeforeStartNormalTermination() throws Exception { @@ -447,6 +513,46 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { assertNull("No refresh was attempted", client.lastRefreshRequest.get()); } + /** + * Tests that we can cancel the request during its throttling delay. This can't use {@link #cancelTaskCase(Consumer)} because it needs + * to send the request un-canceled and cancel it at a specific time. + */ + public void testCancelWhileDelayedAfterScrollResponse() throws Exception { + String reason = randomSimpleString(random()); + + /* + * Replace the thread pool with one that will cancel the task as soon as anything is scheduled, which reindex tries to do when there + * is a delay. + */ + threadPool.shutdown(); + threadPool = new ThreadPool(getTestName()) { + @Override + public ScheduledFuture schedule(TimeValue delay, String name, Runnable command) { + taskManager.cancel(task, reason, (Set s) -> {}); + command.run(); + return null; + } + }; + + // Send the scroll response which will trigger the custom thread pool above, canceling the request before running the response + DummyAbstractAsyncBulkByScrollAction action = new DummyAbstractAsyncBulkByScrollAction(); + boolean previousScrollSet = usually(); + if (previousScrollSet) { + action.setScroll(scrollId()); + } + long total = randomIntBetween(0, Integer.MAX_VALUE); + InternalSearchHits hits = new InternalSearchHits(null, total, 0); + InternalSearchResponse searchResponse = new InternalSearchResponse(hits, null, null, null, false, false); + action.onScrollResponse(timeValueSeconds(0), new SearchResponse(searchResponse, scrollId(), 5, 4, randomLong(), null)); + + // Now that we've got our cancel we'll just verify that it all came through allright + assertEquals(reason, listener.get().getReasonCancelled()); + if (previousScrollSet) { + // Canceled tasks always start to clear the scroll before they die. + assertThat(client.scrollsCleared, contains(scrollId)); + } + } + private void cancelTaskCase(Consumer testMe) throws Exception { DummyAbstractAsyncBulkByScrollAction action = new DummyAbstractAsyncBulkByScrollAction(); boolean previousScrollSet = usually(); @@ -489,10 +595,12 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { } } - private static class MyMockClient extends FilterClient { + private class MyMockClient extends FilterClient { private final List scrollsCleared = new ArrayList<>(); private final AtomicInteger bulksAttempts = new AtomicInteger(); + private final AtomicReference> lastHeaders = new AtomicReference<>(); private final AtomicReference lastRefreshRequest = new AtomicReference<>(); + private final AtomicReference> lastScroll = new AtomicReference<>(); private int bulksToReject = 0; @@ -505,11 +613,16 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { protected , Response extends ActionResponse, RequestBuilder extends ActionRequestBuilder> void doExecute( Action action, Request request, ActionListener listener) { + lastHeaders.set(threadPool.getThreadContext().getHeaders()); if (request instanceof RefreshRequest) { lastRefreshRequest.set((RefreshRequest) request); listener.onResponse(null); return; } + if (request instanceof SearchScrollRequest) { + lastScroll.set(new RequestAndListener<>((SearchScrollRequest) request, (ActionListener) listener)); + return; + } if (request instanceof ClearScrollRequest) { ClearScrollRequest clearScroll = (ClearScrollRequest) request; scrollsCleared.addAll(clearScroll.getScrollIds()); @@ -561,4 +674,14 @@ public class AsyncBulkByScrollActionTests extends ESTestCase { super.doExecute(action, request, listener); } } + + private static class RequestAndListener, Response> { + private final Request request; + private final ActionListener listener; + + public RequestAndListener(Request request, ActionListener listener) { + this.request = request; + this.listener = listener; + } + } } diff --git a/modules/reindex/src/test/java/org/elasticsearch/index/reindex/BulkByScrollTaskTests.java b/modules/reindex/src/test/java/org/elasticsearch/index/reindex/BulkByScrollTaskTests.java index 81a3a2cc706..442943be21f 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/index/reindex/BulkByScrollTaskTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/index/reindex/BulkByScrollTaskTests.java @@ -19,9 +19,12 @@ package org.elasticsearch.index.reindex; +import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.test.ESTestCase; import org.junit.Before; +import static org.elasticsearch.common.unit.TimeValue.parseTimeValue; + public class BulkByScrollTaskTests extends ESTestCase { private BulkByScrollTask task; @@ -101,13 +104,14 @@ public class BulkByScrollTaskTests extends ESTestCase { } public void testStatusHatesNegatives() { - expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(-1, 0, 0, 0, 0, 0, 0, 0, null)); - expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, -1, 0, 0, 0, 0, 0, 0, null)); - expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, -1, 0, 0, 0, 0, 0, null)); - expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, 0, -1, 0, 0, 0, 0, null)); - expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, 0, 0, -1, 0, 0, 0, null)); - expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, 0, 0, 0, -1, 0, 0, null)); - expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, 0, 0, 0, 0, -1, 0, null)); - expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, 0, 0, 0, 0, 0, -1, null)); + TimeValue throttle = parseTimeValue(randomPositiveTimeValue(), "test"); + expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(-1, 0, 0, 0, 0, 0, 0, 0, throttle, null)); + expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, -1, 0, 0, 0, 0, 0, 0, throttle, null)); + expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, -1, 0, 0, 0, 0, 0, throttle, null)); + expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, 0, -1, 0, 0, 0, 0, throttle, null)); + expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, 0, 0, -1, 0, 0, 0, throttle, null)); + expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, 0, 0, 0, -1, 0, 0, throttle, null)); + expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, 0, 0, 0, 0, -1, 0, throttle, null)); + expectThrows(IllegalArgumentException.class, () -> new BulkByScrollTask.Status(0, 0, 0, 0, 0, 0, 0, -1, throttle, null)); } } diff --git a/modules/reindex/src/test/java/org/elasticsearch/index/reindex/RoundTripTests.java b/modules/reindex/src/test/java/org/elasticsearch/index/reindex/RoundTripTests.java index 6e1cbb59e86..cfa763e5dba 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/index/reindex/RoundTripTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/index/reindex/RoundTripTests.java @@ -42,6 +42,7 @@ import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; import static java.util.Collections.singletonList; import static org.apache.lucene.util.TestUtil.randomSimpleString; +import static org.elasticsearch.common.unit.TimeValue.parseTimeValue; import static org.elasticsearch.common.unit.TimeValue.timeValueMillis; /** @@ -77,6 +78,7 @@ public class RoundTripTests extends ESTestCase { request.setTimeout(TimeValue.parseTimeValue(randomTimeValue(), null, "test")); request.setConsistency(randomFrom(WriteConsistencyLevel.values())); request.setScript(random().nextBoolean() ? null : randomScript()); + request.setRequestsPerSecond(between(0, Integer.MAX_VALUE)); } private void assertRequestEquals(AbstractBulkIndexByScrollRequest request, @@ -90,6 +92,7 @@ public class RoundTripTests extends ESTestCase { assertEquals(request.getScript(), tripped.getScript()); assertEquals(request.getRetryBackoffInitialTime(), tripped.getRetryBackoffInitialTime()); assertEquals(request.getMaxRetries(), tripped.getMaxRetries()); + assertEquals(request.getRequestsPerSecond(), tripped.getRequestsPerSecond(), 0d); } public void testBulkByTaskStatus() throws IOException { @@ -119,7 +122,7 @@ public class RoundTripTests extends ESTestCase { private BulkByScrollTask.Status randomStatus() { return new BulkByScrollTask.Status(randomPositiveLong(), randomPositiveLong(), randomPositiveLong(), randomPositiveLong(), randomPositiveInt(), randomPositiveLong(), randomPositiveLong(), randomPositiveLong(), - random().nextBoolean() ? null : randomSimpleString(random())); + parseTimeValue(randomPositiveTimeValue(), "test"), random().nextBoolean() ? null : randomSimpleString(random())); } private List randomIndexingFailures() { @@ -194,5 +197,7 @@ public class RoundTripTests extends ESTestCase { assertEquals(expected.getVersionConflicts(), actual.getVersionConflicts()); assertEquals(expected.getNoops(), actual.getNoops()); assertEquals(expected.getRetries(), actual.getRetries()); + assertEquals(expected.getThrottled(), actual.getThrottled()); + assertEquals(expected.getReasonCancelled(), actual.getReasonCancelled()); } } diff --git a/modules/reindex/src/test/resources/rest-api-spec/test/reindex/10_basic.yaml b/modules/reindex/src/test/resources/rest-api-spec/test/reindex/10_basic.yaml index 31e97967af0..413c8d1c143 100644 --- a/modules/reindex/src/test/resources/rest-api-spec/test/reindex/10_basic.yaml +++ b/modules/reindex/src/test/resources/rest-api-spec/test/reindex/10_basic.yaml @@ -21,6 +21,7 @@ - match: {version_conflicts: 0} - match: {batches: 1} - match: {failures: []} + - match: {throttled_millis: 0} - is_true: took - is_false: task @@ -53,6 +54,7 @@ - match: {version_conflicts: 0} - match: {batches: 1} - match: {failures: []} + - match: {throttled_millis: 0} - is_true: took - is_false: task @@ -84,6 +86,7 @@ - is_false: failures - is_false: noops - is_false: took + - is_false: throttled_millis - is_false: created - do: @@ -163,6 +166,7 @@ - match: {version_conflicts: 1} - match: {batches: 1} - match: {failures: []} + - match: {throttled_millis: 0} - is_true: took --- diff --git a/modules/reindex/src/test/resources/rest-api-spec/test/reindex/80_throttle.yaml b/modules/reindex/src/test/resources/rest-api-spec/test/reindex/80_throttle.yaml new file mode 100644 index 00000000000..2543670d5e6 --- /dev/null +++ b/modules/reindex/src/test/resources/rest-api-spec/test/reindex/80_throttle.yaml @@ -0,0 +1,53 @@ +--- +"Throttle the request": + # Throttling happens between each scroll batch so we need to control the size of the batch by using a single shard + # and a small batch size on the request + - do: + indices.create: + index: source + body: + settings: + number_of_shards: "1" + number_of_replicas: "0" + - do: + cluster.health: + wait_for_status: yellow + - do: + index: + index: source + type: foo + id: 1 + body: { "text": "test" } + - do: + index: + index: source + type: foo + id: 2 + body: { "text": "test" } + - do: + index: + index: source + type: foo + id: 3 + body: { "text": "test" } + - do: + indices.refresh: {} + + - do: + reindex: + requests_per_second: 1 + body: + source: + index: source + size: 1 + dest: + index: dest + - match: {created: 3} + - match: {updated: 0} + - match: {version_conflicts: 0} + - match: {batches: 3} + - match: {failures: []} + - gt: {throttled_millis: 1000} + - lt: {throttled_millis: 4000} + - is_true: took + - is_false: task diff --git a/modules/reindex/src/test/resources/rest-api-spec/test/update-by-query/10_basic.yaml b/modules/reindex/src/test/resources/rest-api-spec/test/update-by-query/10_basic.yaml index 65db8a5e66f..bf54ac5584f 100644 --- a/modules/reindex/src/test/resources/rest-api-spec/test/update-by-query/10_basic.yaml +++ b/modules/reindex/src/test/resources/rest-api-spec/test/update-by-query/10_basic.yaml @@ -18,6 +18,7 @@ - match: {batches: 1} - match: {failures: []} - match: {noops: 0} + - match: {throttled_millis: 0} - is_true: took - is_false: created # Update by query can't create - is_false: task @@ -45,6 +46,7 @@ - is_false: failures - is_false: noops - is_false: took + - is_false: throttled_millis - is_false: created - do: @@ -125,6 +127,7 @@ - match: {batches: 1} - match: {noops: 0} - match: {failures: []} + - match: {throttled_millis: 0} - is_true: took --- @@ -182,6 +185,7 @@ - match: {version_conflicts: 0} - match: {batches: 1} - match: {failures: []} + - match: {throttled_millis: 0} - is_true: took --- diff --git a/modules/reindex/src/test/resources/rest-api-spec/test/update-by-query/70_throttle.yaml b/modules/reindex/src/test/resources/rest-api-spec/test/update-by-query/70_throttle.yaml new file mode 100644 index 00000000000..f0e75b8b2d5 --- /dev/null +++ b/modules/reindex/src/test/resources/rest-api-spec/test/update-by-query/70_throttle.yaml @@ -0,0 +1,39 @@ +"Throttle the request": + # Throttling happens between each scroll batch so we need to control the size of the batch by using a single shard + # and a small batch size on the request + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + - do: + cluster.health: + wait_for_status: yellow + - do: + index: + index: test + type: foo + body: { "text": "test" } + - do: + index: + index: test + type: foo + body: { "text": "test" } + - do: + index: + index: test + type: foo + body: { "text": "test" } + - do: + indices.refresh: {} + + - do: + update-by-query: + index: test + scroll_size: 1 + requests_per_second: 1 + - match: {batches: 3} + - match: {updated: 3} + - gt: {throttled_millis: 1000} + - lt: {throttled_millis: 4000} diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiNumberFilterFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiNumberFilterFactory.java new file mode 100644 index 00000000000..cb6b478957a --- /dev/null +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiNumberFilterFactory.java @@ -0,0 +1,37 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ja.JapaneseNumberFilter; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexSettings; + +public class KuromojiNumberFilterFactory extends AbstractTokenFilterFactory { + + public KuromojiNumberFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { + super(indexSettings, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new JapaneseNumberFilter(tokenStream); + } +} diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java index 87e08c757b4..9e41621525a 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java @@ -36,9 +36,13 @@ import java.io.Reader; public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { private static final String USER_DICT_OPTION = "user_dictionary"; + private static final String NBEST_COST = "nbest_cost"; + private static final String NBEST_EXAMPLES = "nbest_examples"; private final UserDictionary userDictionary; private final Mode mode; + private final String nBestExamples; + private final int nBestCost; private boolean discartPunctuation; @@ -47,6 +51,8 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { mode = getMode(settings); userDictionary = getUserDictionary(env, settings); discartPunctuation = settings.getAsBoolean("discard_punctuation", true); + nBestCost = settings.getAsInt(NBEST_COST, -1); + nBestExamples = settings.get(NBEST_EXAMPLES); } public static UserDictionary getUserDictionary(Environment env, Settings settings) { @@ -83,7 +89,13 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { @Override public Tokenizer create() { - return new JapaneseTokenizer(userDictionary, discartPunctuation, mode); + JapaneseTokenizer t = new JapaneseTokenizer(userDictionary, discartPunctuation, mode); + int nBestCost = this.nBestCost; + if (nBestExamples != null) { + nBestCost = Math.max(nBestCost, t.calcNBestCost(nBestExamples)); + } + t.setNBestCost(nBestCost); + return t; } } diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index 6c0a15f2e39..4208b1be504 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -24,6 +24,7 @@ import org.elasticsearch.index.analysis.KuromojiAnalyzerProvider; import org.elasticsearch.index.analysis.KuromojiBaseFormFilterFactory; import org.elasticsearch.index.analysis.KuromojiIterationMarkCharFilterFactory; import org.elasticsearch.index.analysis.KuromojiKatakanaStemmerFactory; +import org.elasticsearch.index.analysis.KuromojiNumberFilterFactory; import org.elasticsearch.index.analysis.KuromojiPartOfSpeechFilterFactory; import org.elasticsearch.index.analysis.KuromojiReadingFormFilterFactory; import org.elasticsearch.index.analysis.KuromojiTokenizerFactory; @@ -55,5 +56,6 @@ public class AnalysisKuromojiPlugin extends Plugin { module.registerTokenFilter("kuromoji_readingform", KuromojiReadingFormFilterFactory::new); module.registerTokenFilter("kuromoji_stemmer", KuromojiKatakanaStemmerFactory::new); module.registerTokenFilter("ja_stop", JapaneseStopTokenFilterFactory::new); + module.registerTokenFilter("kuromoji_number", KuromojiNumberFilterFactory::new); } } diff --git a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index b81de20d73d..93aeddeee5f 100644 --- a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -26,18 +26,11 @@ import org.apache.lucene.analysis.ja.JapaneseTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.common.inject.Injector; -import org.elasticsearch.common.inject.ModulesBuilder; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.env.Environment; -import org.elasticsearch.env.EnvironmentModule; import org.elasticsearch.index.Index; -import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.IndexSettingsModule; -import org.elasticsearch.test.InternalSettingsPlugin; import java.io.IOException; import java.io.InputStream; @@ -75,6 +68,9 @@ public class KuromojiAnalysisTests extends ESTestCase { filterFactory = analysisService.tokenFilter("ja_stop"); assertThat(filterFactory, instanceOf(JapaneseStopTokenFilterFactory.class)); + filterFactory = analysisService.tokenFilter("kuromoji_number"); + assertThat(filterFactory, instanceOf(KuromojiNumberFilterFactory.class)); + NamedAnalyzer analyzer = analysisService.analyzer("kuromoji"); assertThat(analyzer.analyzer(), instanceOf(JapaneseAnalyzer.class)); @@ -187,34 +183,22 @@ public class KuromojiAnalysisTests extends ESTestCase { assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); } - public AnalysisService createAnalysisService() throws IOException { - InputStream empty_dict = getClass().getResourceAsStream("empty_user_dict.txt"); - InputStream dict = getClass().getResourceAsStream("user_dict.txt"); + private static AnalysisService createAnalysisService() throws IOException { + InputStream empty_dict = KuromojiAnalysisTests.class.getResourceAsStream("empty_user_dict.txt"); + InputStream dict = KuromojiAnalysisTests.class.getResourceAsStream("user_dict.txt"); Path home = createTempDir(); Path config = home.resolve("config"); Files.createDirectory(config); Files.copy(empty_dict, config.resolve("empty_user_dict.txt")); Files.copy(dict, config.resolve("user_dict.txt")); - String json = "/org/elasticsearch/index/analysis/kuromoji_analysis.json"; + Settings settings = Settings.settingsBuilder() - .loadFromStream(json, getClass().getResourceAsStream(json)) - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .build(); - Settings nodeSettings = Settings.settingsBuilder() - .put(Environment.PATH_HOME_SETTING.getKey(), home).build(); - final SettingsModule settingsModule = new SettingsModule(nodeSettings); - settingsModule.registerSetting(InternalSettingsPlugin.VERSION_CREATED); - Index index = new Index("test", "_na_"); - - Environment environment = new Environment(nodeSettings); - AnalysisModule analysisModule = new AnalysisModule(environment); - new AnalysisKuromojiPlugin().onModule(analysisModule); - Injector parentInjector = new ModulesBuilder().add(settingsModule, - new EnvironmentModule(environment), analysisModule) - .createInjector(); - - return parentInjector.getInstance(AnalysisRegistry.class).build(IndexSettingsModule.newIndexSettings(index, settings)); + .loadFromStream(json, KuromojiAnalysisTests.class.getResourceAsStream(json)) + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .build(); + Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), home).build(); + return createAnalysisService(new Index("test", "_na_"), nodeSettings, settings, new AnalysisKuromojiPlugin()::onModule); } public static void assertSimpleTSOutput(TokenStream stream, @@ -262,4 +246,49 @@ public class KuromojiAnalysisTests extends ESTestCase { TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_empty_user_dict"); assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class)); } + + public void testNbestCost() throws IOException { + AnalysisService analysisService = createAnalysisService(); + TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_nbest_cost"); + String source = "鳩山積み"; + String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"}; + + Tokenizer tokenizer = tokenizerFactory.create(); + tokenizer.setReader(new StringReader(source)); + assertSimpleTSOutput(tokenizer, expected); + } + + public void testNbestExample() throws IOException { + AnalysisService analysisService = createAnalysisService(); + TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_nbest_examples"); + String source = "鳩山積み"; + String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"}; + + Tokenizer tokenizer = tokenizerFactory.create(); + tokenizer.setReader(new StringReader(source)); + assertSimpleTSOutput(tokenizer, expected); + } + + public void testNbestBothOptions() throws IOException { + AnalysisService analysisService = createAnalysisService(); + TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_nbest_both"); + String source = "鳩山積み"; + String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"}; + + Tokenizer tokenizer = tokenizerFactory.create(); + tokenizer.setReader(new StringReader(source)); + assertSimpleTSOutput(tokenizer, expected); + + } + + public void testNumberFilterFactory() throws Exception { + AnalysisService analysisService = createAnalysisService(); + TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_number"); + assertThat(tokenFilter, instanceOf(KuromojiNumberFilterFactory.class)); + String source = "本日十万二千五百円のワインを買った"; + String[] expected = new String[]{"本日", "102500", "円", "の", "ワイン", "を", "買っ", "た"}; + Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer.setReader(new StringReader(source)); + assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); + } } diff --git a/plugins/analysis-kuromoji/src/test/resources/org/elasticsearch/index/analysis/kuromoji_analysis.json b/plugins/analysis-kuromoji/src/test/resources/org/elasticsearch/index/analysis/kuromoji_analysis.json index 58ed015b850..d0f94a2117b 100644 --- a/plugins/analysis-kuromoji/src/test/resources/org/elasticsearch/index/analysis/kuromoji_analysis.json +++ b/plugins/analysis-kuromoji/src/test/resources/org/elasticsearch/index/analysis/kuromoji_analysis.json @@ -18,7 +18,6 @@ "type": "ja_stop", "stopwords": ["_japanese_", "スピード"] } - }, "char_filter":{ @@ -48,6 +47,19 @@ "kuromoji_user_dict" : { "type":"kuromoji_tokenizer", "user_dictionary":"user_dict.txt" + }, + "kuromoji_nbest_cost" : { + "type": "kuromoji_tokenizer", + "nbest_cost" : "2000" + }, + "kuromoji_nbest_examples" : { + "type": "kuromoji_tokenizer", + "nbest_examples" : "/鳩山積み-鳩山/鳩山積み-鳩/" + }, + "kuromoji_nbest_both" : { + "type": "kuromoji_tokenizer", + "nbest_examples" : "/鳩山積み-鳩山/鳩山積み-鳩/", + "nbest_cost" : "1000" } }, "analyzer" : { diff --git a/qa/evil-tests/src/test/java/org/elasticsearch/bootstrap/EvilJNANativesTests.java b/qa/evil-tests/src/test/java/org/elasticsearch/bootstrap/EvilJNANativesTests.java index 080eee2501b..1ef5bdfcfae 100644 --- a/qa/evil-tests/src/test/java/org/elasticsearch/bootstrap/EvilJNANativesTests.java +++ b/qa/evil-tests/src/test/java/org/elasticsearch/bootstrap/EvilJNANativesTests.java @@ -27,7 +27,9 @@ import java.io.IOException; import java.nio.file.Files; import java.util.List; +import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; public class EvilJNANativesTests extends ESTestCase { @@ -49,4 +51,26 @@ public class EvilJNANativesTests extends ESTestCase { assertThat(JNANatives.MAX_NUMBER_OF_THREADS, equalTo(-1L)); } } + + public void testSetMaxSizeVirtualMemory() throws IOException { + if (Constants.LINUX) { + final List lines = Files.readAllLines(PathUtils.get("/proc/self/limits")); + if (!lines.isEmpty()) { + for (String line : lines) { + if (line != null && line.startsWith("Max address space")) { + final String[] fields = line.split("\\s+"); + final String limit = fields[3]; + assertEquals(JNANatives.rlimitToString(JNANatives.MAX_SIZE_VIRTUAL_MEMORY), limit); + return; + } + } + } + fail("should have read max size virtual memory from /proc/self/limits"); + } else if (Constants.MAC_OS_X) { + assertThat(JNANatives.MAX_SIZE_VIRTUAL_MEMORY, anyOf(equalTo(Long.MIN_VALUE), greaterThanOrEqualTo(0L))); + } else { + assertThat(JNANatives.MAX_SIZE_VIRTUAL_MEMORY, equalTo(Long.MIN_VALUE)); + } + } + } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/reindex.json b/rest-api-spec/src/main/resources/rest-api-spec/api/reindex.json index f09efef7c91..bfe12c981dc 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/reindex.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/reindex.json @@ -22,9 +22,14 @@ "description" : "Explicit write consistency setting for the operation" }, "wait_for_completion": { - "type" : "boolean", - "default": false, - "description" : "Should the request should block until the reindex is complete." + "type" : "boolean", + "default": false, + "description" : "Should the request should block until the reindex is complete." + }, + "requests_per_second": { + "type": "float", + "default": 0, + "description": "The throttle for this request in sub-requests per second. 0 means set no throttle." } } }, diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/update-by-query.json b/rest-api-spec/src/main/resources/rest-api-spec/api/update-by-query.json index dca49cbcc6a..fe7fdf8a840 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/update-by-query.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/update-by-query.json @@ -198,6 +198,11 @@ "type" : "boolean", "default": false, "description" : "Should the request should block until the reindex is complete." + }, + "requests_per_second": { + "type": "float", + "default": 0, + "description": "The throttle for this request in sub-requests per second. 0 means set no throttle." } } }, diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index f21fc88ce23..14144132765 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -691,10 +691,18 @@ public abstract class ESTestCase extends LuceneTestCase { */ @SafeVarargs public static AnalysisService createAnalysisService(Index index, Settings settings, Consumer... moduleConsumers) throws IOException { + Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); + return createAnalysisService(index, nodeSettings, settings, moduleConsumers); + } + + /** + * Creates an AnalysisService to test analysis factories and analyzers. + */ + @SafeVarargs + public static AnalysisService createAnalysisService(Index index, Settings nodeSettings, Settings settings, Consumer... moduleConsumers) throws IOException { Settings indexSettings = settingsBuilder().put(settings) .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .build(); - Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); Environment env = new Environment(nodeSettings); AnalysisModule analysisModule = new AnalysisModule(env); for (Consumer consumer : moduleConsumers) {