Merge branch 'master' into integ_randomization

This commit is contained in:
Robert Muir 2015-08-06 14:51:00 -04:00
commit d1a5068b77
151 changed files with 5587 additions and 2599 deletions

2
.gitignore vendored
View File

@ -14,7 +14,7 @@ docs/html/
docs/build.log
/tmp/
backwards/
html_docs
## eclipse ignores (use 'mvn eclipse:eclipse' to build eclipse projects)
## All files (.project, .classpath, .settings/*) should be generated through Maven which
## will correctly set the classpath based on the declared dependencies and write settings

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Arrays;
@ -62,13 +63,17 @@ import java.util.List;
public abstract class BlendedTermQuery extends Query {
private final Term[] terms;
private final float[] boosts;
public BlendedTermQuery(Term[] terms) {
public BlendedTermQuery(Term[] terms, float[] boosts) {
if (terms == null || terms.length == 0) {
throw new IllegalArgumentException("terms must not be null or empty");
}
if (boosts != null && boosts.length != terms.length) {
throw new IllegalArgumentException("boosts must have the same size as terms");
}
this.terms = terms;
this.boosts = boosts;
}
@Override
@ -231,8 +236,22 @@ public abstract class BlendedTermQuery extends Query {
@Override
public String toString(String field) {
return "blended(terms: " + Arrays.toString(terms) + ")";
StringBuilder builder = new StringBuilder("blended(terms:[");
for (int i = 0; i < terms.length; ++i) {
builder.append(terms[i]);
float boost = 1f;
if (boosts != null) {
boost = boosts[i];
}
builder.append(ToStringUtils.boost(boost));
builder.append(", ");
}
if (terms.length > 0) {
builder.setLength(builder.length() - 2);
}
builder.append("])");
builder.append(ToStringUtils.boost(getBoost()));
return builder.toString();
}
private volatile Term[] equalTerms = null;
@ -277,7 +296,7 @@ public abstract class BlendedTermQuery extends Query {
}
public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) {
return new BlendedTermQuery(terms) {
return new BlendedTermQuery(terms, boosts) {
@Override
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
BooleanQuery query = new BooleanQuery(disableCoord);
@ -294,7 +313,7 @@ public abstract class BlendedTermQuery extends Query {
}
public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) {
return new BlendedTermQuery(terms) {
return new BlendedTermQuery(terms, boosts) {
@Override
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
BooleanQuery query = new BooleanQuery(true);
@ -334,7 +353,7 @@ public abstract class BlendedTermQuery extends Query {
}
public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) {
return new BlendedTermQuery(terms) {
return new BlendedTermQuery(terms, boosts) {
@Override
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);

View File

@ -272,13 +272,13 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
} catch (IndexNotFoundException e) {
// one of the specified indices is not there - treat it as RED.
ClusterHealthResponse response = new ClusterHealthResponse(clusterName.value(), Strings.EMPTY_ARRAY, clusterState,
numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(settings, clusterState),
numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(), settings, clusterState),
pendingTaskTimeInQueue);
response.status = ClusterHealthStatus.RED;
return response;
}
return new ClusterHealthResponse(clusterName.value(), concreteIndices, clusterState, numberOfPendingTasks,
numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(settings, clusterState), pendingTaskTimeInQueue);
numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(), settings, clusterState), pendingTaskTimeInQueue);
}
}

View File

@ -59,7 +59,7 @@ final class JVMCheck {
/** Returns an error message to the user for a broken version */
String getErrorMessage() {
StringBuilder sb = new StringBuilder();
sb.append("Java version: ").append(Constants.JAVA_VERSION);
sb.append("Java version: ").append(fullVersion());
sb.append(" suffers from critical bug ").append(bugUrl);
sb.append(" which can cause data corruption.");
sb.append(System.lineSeparator());
@ -111,7 +111,7 @@ final class JVMCheck {
*/
static void check() {
if (Boolean.parseBoolean(System.getProperty(JVM_BYPASS))) {
Loggers.getLogger(JVMCheck.class).warn("bypassing jvm version check for version [{}], this can result in data corruption!", Constants.JAVA_VERSION);
Loggers.getLogger(JVMCheck.class).warn("bypassing jvm version check for version [{}], this can result in data corruption!", fullVersion());
} else if ("Oracle Corporation".equals(Constants.JVM_VENDOR)) {
HotspotBug bug = JVM_BROKEN_HOTSPOT_VERSIONS.get(Constants.JVM_VERSION);
if (bug != null) {
@ -135,11 +135,28 @@ final class JVMCheck {
StringBuilder sb = new StringBuilder();
sb.append("IBM J9 runtimes < 2.8 suffer from several bugs which can cause data corruption.");
sb.append(System.lineSeparator());
sb.append("Your version: " + Constants.JVM_VERSION);
sb.append("Your version: " + fullVersion());
sb.append(System.lineSeparator());
sb.append("Please upgrade the JVM to a recent IBM JDK");
throw new RuntimeException(sb.toString());
}
}
}
/**
* Returns java + jvm version, looks like this:
* {@code Oracle Corporation 1.8.0_45 [Java HotSpot(TM) 64-Bit Server VM 25.45-b02]}
*/
static String fullVersion() {
StringBuilder sb = new StringBuilder();
sb.append(Constants.JAVA_VENDOR);
sb.append(" ");
sb.append(Constants.JAVA_VERSION);
sb.append(" [");
sb.append(Constants.JVM_NAME);
sb.append(" ");
sb.append(Constants.JVM_VERSION);
sb.append("]");
return sb.toString();
}
}

View File

@ -57,6 +57,7 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
private AtomicBoolean rerouting = new AtomicBoolean();
private volatile long registeredNextDelaySetting = Long.MAX_VALUE;
private volatile ScheduledFuture registeredNextDelayFuture;
private volatile long unassignedShardsAllocatedTimestamp = 0;
@Inject
public RoutingService(Settings settings, ThreadPool threadPool, ClusterService clusterService, AllocationService allocationService) {
@ -87,6 +88,19 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
return this.allocationService;
}
/**
* Update the last time the allocator tried to assign unassigned shards
*
* This is used so that both the GatewayAllocator and RoutingService use a
* consistent timestamp for comparing which shards have been delayed to
* avoid a race condition where GatewayAllocator thinks the shard should
* be delayed and the RoutingService thinks it has already passed the delay
* and that the GatewayAllocator has/will handle it.
*/
public void setUnassignedShardsAllocatedTimestamp(long timeInMillis) {
this.unassignedShardsAllocatedTimestamp = timeInMillis;
}
/**
* Initiates a reroute.
*/
@ -108,20 +122,29 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
if (nextDelaySetting > 0 && nextDelaySetting < registeredNextDelaySetting) {
FutureUtils.cancel(registeredNextDelayFuture);
registeredNextDelaySetting = nextDelaySetting;
TimeValue nextDelay = TimeValue.timeValueMillis(UnassignedInfo.findNextDelayedAllocationIn(settings, event.state()));
logger.info("delaying allocation for [{}] unassigned shards, next check in [{}]", UnassignedInfo.getNumberOfDelayedUnassigned(settings, event.state()), nextDelay);
registeredNextDelayFuture = threadPool.schedule(nextDelay, ThreadPool.Names.SAME, new AbstractRunnable() {
@Override
protected void doRun() throws Exception {
registeredNextDelaySetting = Long.MAX_VALUE;
reroute("assign delayed unassigned shards");
}
// We use System.currentTimeMillis here because we want the
// next delay from the "now" perspective, rather than the
// delay from the last time the GatewayAllocator tried to
// assign/delay the shard
TimeValue nextDelay = TimeValue.timeValueMillis(UnassignedInfo.findNextDelayedAllocationIn(System.currentTimeMillis(), settings, event.state()));
int unassignedDelayedShards = UnassignedInfo.getNumberOfDelayedUnassigned(unassignedShardsAllocatedTimestamp, settings, event.state());
if (unassignedDelayedShards > 0) {
logger.info("delaying allocation for [{}] unassigned shards, next check in [{}]",
unassignedDelayedShards, nextDelay);
registeredNextDelayFuture = threadPool.schedule(nextDelay, ThreadPool.Names.SAME, new AbstractRunnable() {
@Override
protected void doRun() throws Exception {
registeredNextDelaySetting = Long.MAX_VALUE;
reroute("assign delayed unassigned shards");
}
@Override
public void onFailure(Throwable t) {
logger.warn("failed to schedule/execute reroute post unassigned shard", t);
}
});
@Override
public void onFailure(Throwable t) {
logger.warn("failed to schedule/execute reroute post unassigned shard", t);
registeredNextDelaySetting = Long.MAX_VALUE;
}
});
}
} else {
logger.trace("no need to schedule reroute due to delayed unassigned, next_delay_setting [{}], registered [{}]", nextDelaySetting, registeredNextDelaySetting);
}

View File

@ -199,12 +199,12 @@ public class UnassignedInfo implements ToXContent, Writeable<UnassignedInfo> {
/**
* The time in millisecond until this unassigned shard can be reassigned.
*/
public long getDelayAllocationExpirationIn(Settings settings, Settings indexSettings) {
public long getDelayAllocationExpirationIn(long unassignedShardsAllocatedTimestamp, Settings settings, Settings indexSettings) {
long delayTimeout = getAllocationDelayTimeoutSetting(settings, indexSettings);
if (delayTimeout == 0) {
return 0;
}
long delta = System.currentTimeMillis() - timestamp;
long delta = unassignedShardsAllocatedTimestamp - timestamp;
// account for time drift, treat it as no timeout
if (delta < 0) {
return 0;
@ -216,12 +216,12 @@ public class UnassignedInfo implements ToXContent, Writeable<UnassignedInfo> {
/**
* Returns the number of shards that are unassigned and currently being delayed.
*/
public static int getNumberOfDelayedUnassigned(Settings settings, ClusterState state) {
public static int getNumberOfDelayedUnassigned(long unassignedShardsAllocatedTimestamp, Settings settings, ClusterState state) {
int count = 0;
for (ShardRouting shard : state.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED)) {
if (shard.primary() == false) {
IndexMetaData indexMetaData = state.metaData().index(shard.getIndex());
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(settings, indexMetaData.getSettings());
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(unassignedShardsAllocatedTimestamp, settings, indexMetaData.getSettings());
if (delay > 0) {
count++;
}
@ -251,12 +251,12 @@ public class UnassignedInfo implements ToXContent, Writeable<UnassignedInfo> {
/**
* Finds the next (closest) delay expiration of an unassigned shard. Returns 0 if there are none.
*/
public static long findNextDelayedAllocationIn(Settings settings, ClusterState state) {
public static long findNextDelayedAllocationIn(long unassignedShardsAllocatedTimestamp, Settings settings, ClusterState state) {
long nextDelay = Long.MAX_VALUE;
for (ShardRouting shard : state.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED)) {
if (shard.primary() == false) {
IndexMetaData indexMetaData = state.metaData().index(shard.getIndex());
long nextShardDelay = shard.unassignedInfo().getDelayAllocationExpirationIn(settings, indexMetaData.getSettings());
long nextShardDelay = shard.unassignedInfo().getDelayAllocationExpirationIn(unassignedShardsAllocatedTimestamp, settings, indexMetaData.getSettings());
if (nextShardDelay > 0 && nextShardDelay < nextDelay) {
nextDelay = nextShardDelay;
}

View File

@ -128,9 +128,7 @@ public class ScriptScoreFunction extends ScoreFunction {
@Override
public boolean needsScores() {
// Scripts might use _score so we return true here
// TODO: Make scripts able to tell us whether they use scores
return true;
return script.needsScores();
}
@Override

View File

@ -113,6 +113,10 @@ public class GatewayAllocator extends AbstractComponent {
}
public boolean allocateUnassigned(final RoutingAllocation allocation) {
// Take a snapshot of the current time and tell the RoutingService
// about it, so it will use a consistent timestamp for delays
long lastAllocateUnassignedRun = System.currentTimeMillis();
this.routingService.setUnassignedShardsAllocatedTimestamp(lastAllocateUnassignedRun);
boolean changed = false;
RoutingNodes.UnassignedShards unassigned = allocation.routingNodes().unassigned();
@ -127,7 +131,7 @@ public class GatewayAllocator extends AbstractComponent {
changed |= primaryShardAllocator.allocateUnassigned(allocation);
changed |= replicaShardAllocator.processExistingRecoveries(allocation);
changed |= replicaShardAllocator.allocateUnassigned(allocation);
changed |= replicaShardAllocator.allocateUnassigned(allocation, lastAllocateUnassignedRun);
return changed;
}

View File

@ -111,6 +111,10 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
}
public boolean allocateUnassigned(RoutingAllocation allocation) {
return allocateUnassigned(allocation, System.currentTimeMillis());
}
public boolean allocateUnassigned(RoutingAllocation allocation, long allocateUnassignedTimestapm) {
boolean changed = false;
final RoutingNodes routingNodes = allocation.routingNodes();
final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = routingNodes.unassigned().iterator();
@ -174,7 +178,7 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
// will anyhow wait to find an existing copy of the shard to be allocated
// note: the other side of the equation is scheduling a reroute in a timely manner, which happens in the RoutingService
IndexMetaData indexMetaData = allocation.metaData().index(shard.getIndex());
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(settings, indexMetaData.getSettings());
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(allocateUnassignedTimestapm, settings, indexMetaData.getSettings());
if (delay > 0) {
logger.debug("[{}][{}]: delaying allocation of [{}] for [{}]", shard.index(), shard.id(), shard, TimeValue.timeValueMillis(delay));
/**

View File

@ -228,8 +228,6 @@ public final class ShardGetService extends AbstractIndexShardComponent {
if (source.ttl > 0) {
value = docMapper.TTLFieldMapper().valueForSearch(source.timestamp + source.ttl);
}
} else if (field.equals(SizeFieldMapper.NAME) && docMapper.rootMapper(SizeFieldMapper.class).fieldType().stored()) {
value = source.source.length();
} else {
if (searchLookup == null) {
searchLookup = new SearchLookup(mapperService, null, new String[]{type});

View File

@ -48,7 +48,6 @@ import org.elasticsearch.index.mapper.internal.IdFieldMapper;
import org.elasticsearch.index.mapper.internal.IndexFieldMapper;
import org.elasticsearch.index.mapper.internal.ParentFieldMapper;
import org.elasticsearch.index.mapper.internal.RoutingFieldMapper;
import org.elasticsearch.index.mapper.internal.SizeFieldMapper;
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
import org.elasticsearch.index.mapper.internal.TTLFieldMapper;
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
@ -106,7 +105,6 @@ public class DocumentMapper implements ToXContent {
this.rootMappers.put(IdFieldMapper.class, new IdFieldMapper(indexSettings, mapperService.fullName(IdFieldMapper.NAME)));
this.rootMappers.put(RoutingFieldMapper.class, new RoutingFieldMapper(indexSettings, mapperService.fullName(RoutingFieldMapper.NAME)));
// add default mappers, order is important (for example analyzer should come before the rest to set context.analyzer)
this.rootMappers.put(SizeFieldMapper.class, new SizeFieldMapper(indexSettings, mapperService.fullName(SizeFieldMapper.NAME)));
this.rootMappers.put(IndexFieldMapper.class, new IndexFieldMapper(indexSettings, mapperService.fullName(IndexFieldMapper.NAME)));
this.rootMappers.put(SourceFieldMapper.class, new SourceFieldMapper(indexSettings));
this.rootMappers.put(TypeFieldMapper.class, new TypeFieldMapper(indexSettings, mapperService.fullName(TypeFieldMapper.NAME)));
@ -283,10 +281,6 @@ public class DocumentMapper implements ToXContent {
return rootMapper(ParentFieldMapper.class);
}
public SizeFieldMapper sizeFieldMapper() {
return rootMapper(SizeFieldMapper.class);
}
public TimestampFieldMapper timestampFieldMapper() {
return rootMapper(TimestampFieldMapper.class);
}
@ -299,10 +293,6 @@ public class DocumentMapper implements ToXContent {
return rootMapper(IndexFieldMapper.class);
}
public SizeFieldMapper SizeFieldMapper() {
return rootMapper(SizeFieldMapper.class);
}
public Query typeFilter() {
return typeMapper().fieldType().termQuery(type, null);
}

View File

@ -20,7 +20,9 @@
package org.elasticsearch.index.mapper;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Maps;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseFieldMatcher;
@ -35,8 +37,6 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.mapper.core.*;
import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper;
@ -50,6 +50,7 @@ import org.elasticsearch.index.similarity.SimilarityLookupService;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptService;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@ -73,6 +74,7 @@ public class DocumentMapperParser {
private volatile ImmutableMap<String, Mapper.TypeParser> typeParsers;
private volatile ImmutableMap<String, Mapper.TypeParser> rootTypeParsers;
private volatile ImmutableMap<String, Mapper.TypeParser> additionalRootMappers;
public DocumentMapperParser(@IndexSettings Settings indexSettings, MapperService mapperService, AnalysisService analysisService,
SimilarityLookupService similarityLookupService, ScriptService scriptService) {
@ -109,7 +111,6 @@ public class DocumentMapperParser {
typeParsers = typeParsersBuilder.immutableMap();
rootTypeParsers = new MapBuilder<String, Mapper.TypeParser>()
.put(SizeFieldMapper.NAME, new SizeFieldMapper.TypeParser())
.put(IndexFieldMapper.NAME, new IndexFieldMapper.TypeParser())
.put(SourceFieldMapper.NAME, new SourceFieldMapper.TypeParser())
.put(TypeFieldMapper.NAME, new TypeFieldMapper.TypeParser())
@ -123,6 +124,7 @@ public class DocumentMapperParser {
.put(IdFieldMapper.NAME, new IdFieldMapper.TypeParser())
.put(FieldNamesFieldMapper.NAME, new FieldNamesFieldMapper.TypeParser())
.immutableMap();
additionalRootMappers = ImmutableSortedMap.<String, Mapper.TypeParser>of();
indexVersionCreated = Version.indexCreated(indexSettings);
}
@ -139,6 +141,10 @@ public class DocumentMapperParser {
rootTypeParsers = new MapBuilder<>(rootTypeParsers)
.put(type, typeParser)
.immutableMap();
additionalRootMappers = ImmutableSortedMap.<String, Mapper.TypeParser>naturalOrder()
.putAll(additionalRootMappers)
.put(type, typeParser)
.build();
}
}
@ -204,6 +210,10 @@ public class DocumentMapperParser {
Mapper.TypeParser.ParserContext parserContext = parserContext();
// parse RootObjectMapper
DocumentMapper.Builder docBuilder = doc(indexSettings, (RootObjectMapper.Builder) rootObjectTypeParser.parse(type, mapping, parserContext), mapperService);
// Add default mapping for the plugged-in meta mappers
for (Map.Entry<String, Mapper.TypeParser> entry : additionalRootMappers.entrySet()) {
docBuilder.put((MetadataFieldMapper.Builder<?, ?>) entry.getValue().parse(entry.getKey(), Collections.<String, Object>emptyMap(), parserContext));
}
Iterator<Map.Entry<String, Object>> iterator = mapping.entrySet().iterator();
// parse DocumentMapper
while(iterator.hasNext()) {

View File

@ -182,7 +182,9 @@ public class QueryParseContext {
}
public void addNamedQuery(String name, Query query) {
namedQueries.put(name, query);
if (query != null) {
namedQueries.put(name, query);
}
}
public ImmutableMap<String, Query> copyNamedQueries() {

View File

@ -85,7 +85,7 @@ public class MultiMatchQuery extends MatchQuery {
throw new IllegalStateException("No such type: " + type);
}
final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch);
return queryBuilder.conbineGrouped(queries);
return queryBuilder.combineGrouped(queries);
}
private QueryBuilder queryBuilder;
@ -119,7 +119,7 @@ public class MultiMatchQuery extends MatchQuery {
return parseAndApply(type, field, value, minimumShouldMatch, boostValue);
}
public Query conbineGrouped(List<? extends Query> groupQuery) {
public Query combineGrouped(List<? extends Query> groupQuery) {
if (groupQuery == null || groupQuery.isEmpty()) {
return null;
}
@ -196,7 +196,7 @@ public class MultiMatchQuery extends MatchQuery {
blendedFields = null;
}
final FieldAndFieldType fieldAndFieldType = group.get(0);
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, fieldAndFieldType.boost, value, minimumShouldMatch);
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, 1f, value, minimumShouldMatch);
if (q != null) {
queries.add(q);
}

View File

@ -86,6 +86,10 @@ public class NativeScriptEngineService extends AbstractComponent implements Scri
script.setLookup(lookup.getLeafSearchLookup(context));
return script;
}
@Override
public boolean needsScores() {
return scriptFactory.needsScores();
}
};
}

View File

@ -41,4 +41,11 @@ public interface NativeScriptFactory {
* @param params The parameters passed to the script. Can be <tt>null</tt>.
*/
ExecutableScript newScript(@Nullable Map<String, Object> params);
/**
* Indicates if document scores may be needed by the produced scripts.
*
* @return {@code true} if scores are needed.
*/
boolean needsScores();
}

View File

@ -29,4 +29,11 @@ public interface SearchScript {
LeafSearchScript getLeafSearchScript(LeafReaderContext context) throws IOException;
/**
* Indicates if document scores may be needed by this {@link SearchScript}.
*
* @return {@code true} if scores are needed.
*/
boolean needsScores();
}

View File

@ -112,7 +112,6 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
for (String variable : expr.variables) {
if (variable.equals("_score")) {
bindings.add(new SortField("_score", SortField.Type.SCORE));
} else if (variable.equals("_value")) {
specialValue = new ReplaceableConstValueSource();
bindings.add("_value", specialValue);
@ -173,7 +172,8 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
}
}
return new ExpressionSearchScript(compiledScript, bindings, specialValue);
final boolean needsScores = expr.getSortField(bindings, false).needsScores();
return new ExpressionSearchScript(compiledScript, bindings, specialValue, needsScores);
} catch (Exception exception) {
throw new ScriptException("Error during search with " + compiledScript, exception);
}

View File

@ -46,14 +46,21 @@ class ExpressionSearchScript implements SearchScript {
final SimpleBindings bindings;
final ValueSource source;
final ReplaceableConstValueSource specialValue; // _value
final boolean needsScores;
Scorer scorer;
int docid;
ExpressionSearchScript(CompiledScript c, SimpleBindings b, ReplaceableConstValueSource v) {
ExpressionSearchScript(CompiledScript c, SimpleBindings b, ReplaceableConstValueSource v, boolean needsScores) {
compiledScript = c;
bindings = b;
source = ((Expression)compiledScript.compiled()).getValueSource(bindings);
specialValue = v;
this.needsScores = needsScores;
}
@Override
public boolean needsScores() {
return needsScores;
}
@Override

View File

@ -168,6 +168,12 @@ public class GroovyScriptEngineService extends AbstractComponent implements Scri
}
return new GroovyScript(compiledScript, scriptObject, leafLookup, logger);
}
@Override
public boolean needsScores() {
// TODO: can we reliably know if a groovy script makes use of _score
return true;
}
};
}

View File

@ -216,8 +216,7 @@ public abstract class ValuesSource {
@Override
public boolean needsScores() {
// TODO: add a way to know whether scripts are using scores
return true;
return script.needsScores();
}
}
@ -295,8 +294,7 @@ public abstract class ValuesSource {
@Override
public boolean needsScores() {
// TODO: add a way to know whether scripts are using scores
return true;
return script.needsScores();
}
@Override
@ -431,8 +429,7 @@ public abstract class ValuesSource {
@Override
public boolean needsScores() {
// TODO: add a way to know whether scripts are using scores
return true;
return script.needsScores();
}
}
@ -451,8 +448,7 @@ public abstract class ValuesSource {
@Override
public boolean needsScores() {
// TODO: add a way to know whether scripts are using scores
return true;
return script.needsScores();
}
@Override

View File

@ -34,6 +34,11 @@ public class NativeScript1 extends AbstractSearchScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativeScript1();
}
@Override
public boolean needsScores() {
return false;
}
}
public static final String NATIVE_SCRIPT_1 = "native_1";

View File

@ -34,6 +34,11 @@ public class NativeScript2 extends AbstractSearchScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativeScript2();
}
@Override
public boolean needsScores() {
return false;
}
}
public static final String NATIVE_SCRIPT_2 = "native_2";

View File

@ -34,6 +34,11 @@ public class NativeScript3 extends AbstractSearchScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativeScript3();
}
@Override
public boolean needsScores() {
return false;
}
}
public static final String NATIVE_SCRIPT_3 = "native_3";

View File

@ -34,6 +34,11 @@ public class NativeScript4 extends AbstractSearchScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativeScript4();
}
@Override
public boolean needsScores() {
return false;
}
}
public static final String NATIVE_SCRIPT_4 = "native_4";

View File

@ -36,6 +36,11 @@ public class NativeConstantForLoopScoreScript extends AbstractSearchScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativeConstantForLoopScoreScript(params);
}
@Override
public boolean needsScores() {
return false;
}
}
private NativeConstantForLoopScoreScript(Map<String, Object> params) {

View File

@ -36,6 +36,11 @@ public class NativeConstantScoreScript extends AbstractSearchScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativeConstantScoreScript();
}
@Override
public boolean needsScores() {
return false;
}
}
private NativeConstantScoreScript() {

View File

@ -42,6 +42,11 @@ public class NativeNaiveTFIDFScoreScript extends AbstractSearchScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativeNaiveTFIDFScoreScript(params);
}
@Override
public boolean needsScores() {
return false;
}
}
private NativeNaiveTFIDFScoreScript(Map<String, Object> params) {

View File

@ -44,6 +44,11 @@ public class NativePayloadSumNoRecordScoreScript extends AbstractSearchScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativePayloadSumNoRecordScoreScript(params);
}
@Override
public boolean needsScores() {
return false;
}
}
private NativePayloadSumNoRecordScoreScript(Map<String, Object> params) {

View File

@ -44,6 +44,11 @@ public class NativePayloadSumScoreScript extends AbstractSearchScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativePayloadSumScoreScript(params);
}
@Override
public boolean needsScores() {
return false;
}
}
private NativePayloadSumScoreScript(Map<String, Object> params) {

View File

@ -34,15 +34,18 @@ import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationComman
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
import org.elasticsearch.cluster.routing.allocation.decider.DisableAllocationDecider;
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider.Allocation;
import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.io.FileSystemUtils;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
import org.elasticsearch.test.InternalTestCluster;
import org.junit.Test;
import java.nio.file.Path;
@ -160,6 +163,40 @@ public class ClusterRerouteIT extends ESIntegTestCase {
rerouteWithAllocateLocalGateway(commonSettings);
}
@Test
public void testDelayWithALargeAmountOfShards() throws Exception {
Settings commonSettings = settingsBuilder()
.put("gateway.type", "local")
.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_CONCURRENT_RECOVERIES, 1)
.build();
logger.info("--> starting 4 nodes");
String node_1 = internalCluster().startNode(commonSettings);
internalCluster().startNode(commonSettings);
internalCluster().startNode(commonSettings);
internalCluster().startNode(commonSettings);
assertThat(cluster().size(), equalTo(4));
ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForNodes("4").execute().actionGet();
assertThat(healthResponse.isTimedOut(), equalTo(false));
logger.info("--> create indices");
for (int i = 0; i < 25; i++) {
client().admin().indices().prepareCreate("test" + i)
.setSettings(settingsBuilder()
.put("index.number_of_shards", 5).put("index.number_of_replicas", 1)
.put("index.unassigned.node_left.delayed_timeout", randomIntBetween(250, 1000) + "ms"))
.execute().actionGet();
}
ensureGreen(TimeValue.timeValueMinutes(1));
logger.info("--> stopping node1");
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node_1));
// This might run slowly on older hardware
ensureGreen(TimeValue.timeValueMinutes(2));
}
private void rerouteWithAllocateLocalGateway(Settings commonSettings) throws Exception {
logger.info("--> starting 2 nodes");
String node_1 = internalCluster().startNode(commonSettings);

View File

@ -28,6 +28,7 @@ import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.allocation.AllocationService;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.test.ESAllocationTestCase;
import org.elasticsearch.threadpool.ThreadPool;
import org.junit.After;
@ -112,6 +113,10 @@ public class RoutingServiceTests extends ESAllocationTestCase {
ClusterState prevState = clusterState;
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
// We need to update the routing service's last attempted run to
// signal that the GatewayAllocator tried to allocated it but
// it was delayed
routingService.setUnassignedShardsAllocatedTimestamp(System.currentTimeMillis());
ClusterState newState = clusterState;
routingService.clusterChanged(new ClusterChangedEvent("test", newState, prevState));
@ -125,6 +130,44 @@ public class RoutingServiceTests extends ESAllocationTestCase {
assertThat(routingService.getRegisteredNextDelaySetting(), equalTo(Long.MAX_VALUE));
}
@Test
public void testDelayedUnassignedDoesNotRerouteForNegativeDelays() throws Exception {
AllocationService allocation = createAllocationService();
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "100ms"))
.numberOfShards(1).numberOfReplicas(1))
.build();
ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
.metaData(metaData)
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test"))).build();
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")).localNodeId("node1").masterNodeId("node1")).build();
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
// starting primaries
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
// starting replicas
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
assertThat(clusterState.routingNodes().hasUnassigned(), equalTo(false));
// remove node2 and reroute
ClusterState prevState = clusterState;
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
// Set it in the future so the delay will be negative
routingService.setUnassignedShardsAllocatedTimestamp(System.currentTimeMillis() + TimeValue.timeValueMinutes(1).millis());
ClusterState newState = clusterState;
routingService.clusterChanged(new ClusterChangedEvent("test", newState, prevState));
assertBusy(new Runnable() {
@Override
public void run() {
assertThat(routingService.hasReroutedAndClear(), equalTo(false));
// verify the registration has been updated
assertThat(routingService.getRegisteredNextDelaySetting(), equalTo(100L));
}
});
}
private class TestRoutingService extends RoutingService {
private AtomicBoolean rerouted = new AtomicBoolean();

View File

@ -273,7 +273,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
assertBusy(new Runnable() {
@Override
public void run() {
long delay = unassignedInfo.getDelayAllocationExpirationIn(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
long delay = unassignedInfo.getDelayAllocationExpirationIn(System.currentTimeMillis(),
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
assertThat(delay, greaterThan(0l));
assertThat(delay, lessThan(TimeValue.timeValueHours(10).millis()));
}
@ -290,7 +291,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
UnassignedInfo unassignedInfo = new UnassignedInfo(RandomPicks.randomFrom(getRandom(), reasons), null);
long delay = unassignedInfo.getAllocationDelayTimeoutSetting(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
assertThat(delay, equalTo(0l));
delay = unassignedInfo.getDelayAllocationExpirationIn(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
delay = unassignedInfo.getDelayAllocationExpirationIn(System.currentTimeMillis(),
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
assertThat(delay, equalTo(0l));
}
@ -306,7 +308,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test1")).addAsNew(metaData.index("test2"))).build();
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build();
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(),
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
// starting primaries
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
// starting replicas
@ -315,7 +318,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
// remove node2 and reroute
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
assertThat(clusterState.prettyPrint(), UnassignedInfo.getNumberOfDelayedUnassigned(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(2));
assertThat(clusterState.prettyPrint(), UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(),
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(2));
}
@Test
@ -330,7 +334,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test1")).addAsNew(metaData.index("test2"))).build();
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build();
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(),
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
// starting primaries
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
// starting replicas
@ -343,7 +348,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
long nextDelaySetting = UnassignedInfo.findSmallestDelayedAllocationSetting(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState);
assertThat(nextDelaySetting, equalTo(TimeValue.timeValueHours(10).millis()));
long nextDelay = UnassignedInfo.findNextDelayedAllocationIn(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState);
long nextDelay = UnassignedInfo.findNextDelayedAllocationIn(System.currentTimeMillis(),
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState);
assertThat(nextDelay, greaterThan(TimeValue.timeValueHours(9).millis()));
assertThat(nextDelay, lessThanOrEqualTo(TimeValue.timeValueHours(10).millis()));
}

View File

@ -73,5 +73,10 @@ public class ScriptScoreFunctionTests extends ESTestCase {
}
};
}
@Override
public boolean needsScores() {
return false;
}
}
}

View File

@ -1031,9 +1031,6 @@ public class GetActionIT extends ESIntegTestCase {
" \"doc\": {\n" +
" \"_timestamp\": {\n" +
" \"enabled\": true\n" +
" },\n" +
" \"_size\": {\n" +
" \"enabled\": true\n" +
" }\n" +
" }\n" +
" }\n" +
@ -1045,7 +1042,7 @@ public class GetActionIT extends ESIntegTestCase {
" \"text\": \"some text.\"\n" +
"}\n";
client().prepareIndex("test", "doc").setId("1").setSource(doc).setRouting("1").get();
String[] fieldsList = {"_timestamp", "_size", "_routing"};
String[] fieldsList = {"_timestamp", "_routing"};
// before refresh - document is only in translog
assertGetFieldsAlwaysWorks(indexOrAlias(), "doc", "1", fieldsList, "1");
refresh();

View File

@ -44,8 +44,8 @@ import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.ParseContext.Document;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
import org.elasticsearch.index.mapper.internal.SizeFieldMapper;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
import org.hamcrest.Matchers;
import org.junit.Test;
@ -387,7 +387,7 @@ public class SimpleAllMapperTests extends ESSingleNodeTestCase {
String mapping = "{";
Map<String, String> rootTypes = new HashMap<>();
//just pick some example from DocumentMapperParser.rootTypeParsers
rootTypes.put(SizeFieldMapper.NAME, "{\"enabled\" : true}");
rootTypes.put(TimestampFieldMapper.NAME, "{\"enabled\" : true}");
rootTypes.put("include_in_all", "true");
rootTypes.put("dynamic_date_formats", "[\"yyyy-MM-dd\", \"dd-MM-yyyy\"]");
rootTypes.put("numeric_detection", "true");

View File

@ -33,16 +33,16 @@ public class ParseMappingTypeLevelTests extends ESSingleNodeTestCase {
@Test
public void testTypeLevel() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_size").field("enabled", true).endObject()
.startObject("_timestamp").field("enabled", true).endObject()
.endObject().endObject().string();
DocumentMapperParser parser = createIndex("test").mapperService().documentMapperParser();
DocumentMapper mapper = parser.parse("type", mapping);
assertThat(mapper.type(), equalTo("type"));
assertThat(mapper.sizeFieldMapper().enabled(), equalTo(true));
assertThat(mapper.timestampFieldMapper().enabled(), equalTo(true));
mapper = parser.parse(mapping);
assertThat(mapper.type(), equalTo("type"));
assertThat(mapper.sizeFieldMapper().enabled(), equalTo(true));
assertThat(mapper.timestampFieldMapper().enabled(), equalTo(true));
}
}

View File

@ -158,25 +158,6 @@ public class UpdateMappingTests extends ESSingleNodeTestCase {
assertTrue(documentMapper.timestampFieldMapper().fieldType().stored());
}
@Test
public void testSizeParsing() throws IOException {
IndexService indexService = createIndex("test", Settings.settingsBuilder().build());
XContentBuilder indexMapping = XContentFactory.jsonBuilder();
boolean enabled = randomBoolean();
indexMapping.startObject()
.startObject("type")
.startObject("_size")
.field("enabled", enabled)
.endObject()
.endObject()
.endObject();
DocumentMapper documentMapper = indexService.mapperService().parse("type", new CompressedXContent(indexMapping.string()), true);
assertThat(documentMapper.sizeFieldMapper().enabled(), equalTo(enabled));
assertTrue(documentMapper.sizeFieldMapper().fieldType().stored());
documentMapper = indexService.mapperService().parse("type", new CompressedXContent(documentMapper.mappingSource().string()), true);
assertThat(documentMapper.sizeFieldMapper().enabled(), equalTo(enabled));
}
@Test
public void testSizeTimestampIndexParsing() throws IOException {
IndexService indexService = createIndex("test", Settings.settingsBuilder().build());
@ -192,7 +173,7 @@ public class UpdateMappingTests extends ESSingleNodeTestCase {
createIndex("test1", Settings.settingsBuilder().build());
createIndex("test2", Settings.settingsBuilder().build());
XContentBuilder defaultMapping = XContentFactory.jsonBuilder().startObject()
.startObject(MapperService.DEFAULT_MAPPING).startObject("_size").field("enabled", true).endObject().endObject()
.startObject(MapperService.DEFAULT_MAPPING).startObject("_timestamp").field("enabled", true).endObject().endObject()
.endObject();
client().admin().indices().preparePutMapping().setType(MapperService.DEFAULT_MAPPING).setSource(defaultMapping).get();
XContentBuilder typeMapping = XContentFactory.jsonBuilder().startObject()
@ -204,7 +185,7 @@ public class UpdateMappingTests extends ESSingleNodeTestCase {
GetMappingsResponse response = client().admin().indices().prepareGetMappings("test2").get();
assertNotNull(response.getMappings().get("test2").get("type").getSourceAsMap().get("_all"));
assertFalse((Boolean) ((LinkedHashMap) response.getMappings().get("test2").get("type").getSourceAsMap().get("_all")).get("enabled"));
assertNotNull(response.getMappings().get("test2").get("type").getSourceAsMap().get("_size"));
assertTrue((Boolean)((LinkedHashMap)response.getMappings().get("test2").get("type").getSourceAsMap().get("_size")).get("enabled"));
assertNotNull(response.getMappings().get("test2").get("type").getSourceAsMap().get("_timestamp"));
assertTrue((Boolean)((LinkedHashMap)response.getMappings().get("test2").get("type").getSourceAsMap().get("_timestamp")).get("enabled"));
}
}

View File

@ -1 +1 @@
{"type":{"_size":{"enabled":false},"_timestamp":{"enabled":false}}}
{"type":{"_timestamp":{"enabled":false}}}

View File

@ -0,0 +1,52 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.junit.Test;
import java.io.IOException;
public class CommonTermsQueryParserTest extends ESSingleNodeTestCase {
@Test
public void testWhenParsedQueryIsNullNoNullPointerExceptionIsThrown() throws IOException {
final String index = "test-index";
final String type = "test-type";
client()
.admin()
.indices()
.prepareCreate(index)
.addMapping(type, "name", "type=string,analyzer=stop")
.execute()
.actionGet();
ensureGreen();
CommonTermsQueryBuilder commonTermsQueryBuilder =
new CommonTermsQueryBuilder("name", "the").queryName("query-name");
// the named query parses to null; we are testing this does not cause a NullPointerException
SearchResponse response =
client().prepareSearch(index).setTypes(type).setQuery(commonTermsQueryBuilder).execute().actionGet();
assertNotNull(response);
assertEquals(response.getHits().hits().length, 0);
}
}

View File

@ -54,6 +54,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
@ -83,6 +84,7 @@ import static org.hamcrest.Matchers.*;
public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
private IndexQueryParserService queryParser;
private IndexService indexService;
@Before
public void setup() throws IOException {
@ -99,6 +101,7 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
assertNotNull(doc.dynamicMappingsUpdate());
client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get();
this.indexService = indexService;
queryParser = indexService.queryParserService();
}
@ -2269,6 +2272,23 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
}
public void testCrossFieldMultiMatchQuery() throws IOException {
IndexQueryParserService queryParser = queryParser();
Query parsedQuery = queryParser.parse(multiMatchQuery("banon", "name.first^2", "name.last^3", "foobar").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)).query();
try (Engine.Searcher searcher = indexService.shardSafe(0).acquireSearcher("test")) {
Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery);
BooleanQuery expected = new BooleanQuery();
expected.add(new TermQuery(new Term("foobar", "banon")), Occur.SHOULD);
TermQuery tq1 = new TermQuery(new Term("name.first", "banon"));
tq1.setBoost(2);
TermQuery tq2 = new TermQuery(new Term("name.last", "banon"));
tq2.setBoost(3);
expected.add(new DisjunctionMaxQuery(Arrays.<Query>asList(tq1, tq2), 0f), Occur.SHOULD);
assertEquals(expected, rewrittenQuery);
}
}
@Test
public void testSimpleQueryString() throws Exception {
IndexQueryParserService queryParser = queryParser();

View File

@ -506,37 +506,6 @@ public class PercolatorIT extends ESIntegTestCase {
assertThat(percolate.getMatches(), emptyArray());
}
@Test
public void percolateWithSizeField() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("_size").field("enabled", true).endObject()
.startObject("properties").startObject("field1").field("type", "string").endObject().endObject()
.endObject().endObject().string();
assertAcked(prepareCreate("test").addMapping("type1", mapping));
ensureGreen();
logger.info("--> register a query");
client().prepareIndex("test", PercolatorService.TYPE_NAME, "kuku")
.setSource(jsonBuilder().startObject()
.field("query", termQuery("field1", "value1"))
.endObject())
.setRefresh(true)
.execute().actionGet();
logger.info("--> percolate a document");
PercolateResponse percolate = client().preparePercolate().setIndices("test").setDocumentType("type1")
.setSource(jsonBuilder().startObject()
.startObject("doc")
.field("field1", "value1")
.endObject()
.endObject())
.execute().actionGet();
assertMatchCount(percolate, 1l);
assertThat(percolate.getMatches(), arrayWithSize(1));
assertThat(convertFromTextArray(percolate.getMatches(), "test"), arrayContaining("kuku"));
}
@Test
public void testPercolateStatistics() throws Exception {
client().admin().indices().prepareCreate("test").execute().actionGet();

View File

@ -97,6 +97,11 @@ public class NativeScriptTests extends ESTestCase {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new MyScript();
}
@Override
public boolean needsScores() {
return false;
}
}
static class MyScript extends AbstractExecutableScript {

View File

@ -81,6 +81,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new IntScript();
}
@Override
public boolean needsScores() {
return false;
}
}
static class IntScript extends AbstractSearchScript {
@ -95,6 +100,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new LongScript();
}
@Override
public boolean needsScores() {
return false;
}
}
static class LongScript extends AbstractSearchScript {
@ -109,6 +119,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new FloatScript();
}
@Override
public boolean needsScores() {
return false;
}
}
static class FloatScript extends AbstractSearchScript {
@ -123,6 +138,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new DoubleScript();
}
@Override
public boolean needsScores() {
return false;
}
}
static class DoubleScript extends AbstractSearchScript {

View File

@ -0,0 +1,57 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.script.expression;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.script.CompiledScript;
import org.elasticsearch.script.ScriptService.ScriptType;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.test.ESSingleNodeTestCase;
import java.util.Collections;
public class ExpressionScriptTests extends ESSingleNodeTestCase {
public void testNeedsScores() {
IndexService index = createIndex("test", Settings.EMPTY, "type", "d", "type=double");
ExpressionScriptEngineService service = new ExpressionScriptEngineService(Settings.EMPTY);
SearchLookup lookup = new SearchLookup(index.mapperService(), index.fieldData(), null);
Object compiled = service.compile("1.2");
SearchScript ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
assertFalse(ss.needsScores());
compiled = service.compile("doc['d'].value");
ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
assertFalse(ss.needsScores());
compiled = service.compile("1/_score");
ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
assertTrue(ss.needsScores());
compiled = service.compile("doc['d'].value * _score");
ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
assertTrue(ss.needsScores());
}
}

View File

@ -35,6 +35,11 @@ public class NativeSignificanceScoreScriptNoParams extends TestScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativeSignificanceScoreScriptNoParams();
}
@Override
public boolean needsScores() {
return false;
}
}
private NativeSignificanceScoreScriptNoParams() {

View File

@ -36,6 +36,11 @@ public class NativeSignificanceScoreScriptWithParams extends TestScript {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new NativeSignificanceScoreScriptWithParams(params);
}
@Override
public boolean needsScores() {
return false;
}
}
private NativeSignificanceScoreScriptWithParams(Map<String, Object> params) {

View File

@ -76,9 +76,8 @@ public class SearchFieldsIT extends ESIntegTestCase {
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().execute().actionGet();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
// _timestamp and _size are randomly enabled via templates but we don't want it here to test stored fields behaviour
// _timestamp is randomly enabled via templates but we don't want it here to test stored fields behaviour
.startObject("_timestamp").field("enabled", false).endObject()
.startObject("_size").field("enabled", false).endObject()
.startObject("properties")
.startObject("field1").field("type", "string").field("store", "yes").endObject()
.startObject("field2").field("type", "string").field("store", "no").endObject()

View File

@ -102,6 +102,10 @@ public class ExplainableScriptIT extends ESIntegTestCase {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new MyScript();
}
@Override
public boolean needsScores() {
return true;
}
}
static class MyScript extends AbstractDoubleSearchScript implements ExplainableSearchScript, ExecutableScript {

View File

@ -102,7 +102,6 @@ import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MappedFieldType.Loading;
import org.elasticsearch.index.mapper.internal.SizeFieldMapper;
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
import org.elasticsearch.index.shard.MergePolicyConfig;
import org.elasticsearch.index.translog.Translog;
@ -357,11 +356,6 @@ public abstract class ESIntegTestCase extends ESTestCase {
.field("enabled", randomBoolean());
mappings.endObject();
}
if (randomBoolean()) {
mappings.startObject(SizeFieldMapper.NAME)
.field("enabled", randomBoolean())
.endObject();
}
mappings.startArray("dynamic_templates")
.startObject()
.startObject("template-strings")

View File

@ -74,6 +74,10 @@ public class UpdateByNativeScriptIT extends ESIntegTestCase {
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
return new CustomScript(params);
}
@Override
public boolean needsScores() {
return false;
}
}
static class CustomScript extends AbstractExecutableScript {

View File

@ -53,7 +53,7 @@ Response:
}
--------------------------------------------------
The specified field must be of type `geo_point` (which can only be set explicitly in the mappings). And it can also hold an array of `geo_point` fields, in which case all will be taken into account during aggregation. The origin point can accept all formats supported by the `geo_point` <<mapping-geo-point-type,type>>:
The specified field must be of type `geo_point` (which can only be set explicitly in the mappings). And it can also hold an array of `geo_point` fields, in which case all will be taken into account during aggregation. The origin point can accept all formats supported by the <<geo-point,`geo_point` type>>:
* Object format: `{ "lat" : 52.3760, "lon" : 4.894 }` - this is the safest format as it is the most explicit about the `lat` & `lon` values
* String format: `"52.3760, 4.894"` - where the first number is the `lat` and the second is the `lon`

View File

@ -200,7 +200,7 @@ and therefore can't be used in the `order` option of the `terms` aggregator.
If the `top_hits` aggregator is wrapped in a `nested` or `reverse_nested` aggregator then nested hits are being returned.
Nested hits are in a sense hidden mini documents that are part of regular document where in the mapping a nested field type
has been configured. The `top_hits` aggregator has the ability to un-hide these documents if it is wrapped in a `nested`
or `reverse_nested` aggregator. Read more about nested in the <<mapping-nested-type,nested type mapping>>.
or `reverse_nested` aggregator. Read more about nested in the <<nested,nested type mapping>>.
If nested type has been configured a single document is actually indexed as multiple Lucene documents and they share
the same id. In order to determine the identity of a nested hit there is more needed than just the id, so that is why

View File

@ -152,6 +152,34 @@ being consumed by a monitoring tool, rather than intended for human
consumption. The default for the `human` flag is
`false`.
[[date-math]]
[float]
=== Date Math
Most parameters which accept a formatted date value -- such as `gt` and `lt`
in <<query-dsl-range-query,range queries>> `range` queries, or `from` and `to`
in <<search-aggregations-bucket-daterange-aggregation,`daterange`
aggregations>> -- understand date maths.
The expression starts with an anchor date, which can either be `now`, or a
date string ending with `||`. This anchor date can optionally be followed by
one or more maths expressions:
* `+1h` - add one hour
* `-1d` - subtract one day
* `/d` - round down to the nearest day
The supported <<time-units,time units>> are: `y` (year), `M` (month), `w` (week),
`d` (day), `h` (hour), `m` (minute), and `s` (second).
Some examples are:
[horizontal]
`now+1h`:: The current time plus one hour, with ms resolution.
`now+1h+1m`:: The current time plus one hour plus one minute, with ms resolution.
`now+1h/d`:: The current time plus one hour, rounded down to the nearest day.
`2015-01-01||+1M/d`:: `2015-01-01` plus one month, rounded down to the nearest day.
[float]
=== Response Filtering
@ -237,10 +265,10 @@ curl 'localhost:9200/_segments?pretty&filter_path=indices.**.version'
--------------------------------------------------
Note that elasticsearch sometimes returns directly the raw value of a field,
like the `_source` field. If you want to filter _source fields, you should
like the `_source` field. If you want to filter `_source` fields, you should
consider combining the already existing `_source` parameter (see
<<get-source-filtering,Get API>> for more details) with the `filter_path`
parameter like this:
parameter like this:
[source,sh]
--------------------------------------------------
@ -318,8 +346,9 @@ of supporting the native JSON number types.
[float]
=== Time units
Whenever durations need to be specified, eg for a `timeout` parameter, the duration
can be specified as a whole number representing time in milliseconds, or as a time value like `2d` for 2 days. The supported units are:
Whenever durations need to be specified, eg for a `timeout` parameter, the
duration must specify the unit, like `2d` for 2 days. The supported units
are:
[horizontal]
`y`:: Year
@ -329,6 +358,7 @@ can be specified as a whole number representing time in milliseconds, or as a ti
`h`:: Hour
`m`:: Minute
`s`:: Second
`ms`:: Milli-second
[[distance-units]]
[float]

View File

@ -6,53 +6,3 @@ added to an index either when creating it or by using the put mapping
api. It also handles the dynamic mapping support for types that have no
explicit mappings pre defined. For more information about mapping
definitions, check out the <<mapping,mapping section>>.
[float]
=== Dynamic Mappings
New types and new fields within types can be added dynamically just
by indexing a document. When Elasticsearch encounters a new type,
it creates the type using the `_default_` mapping (see below).
When it encounters a new field within a type, it autodetects the
datatype that the field contains and adds it to the type mapping
automatically.
See <<mapping-dynamic-mapping>> for details of how to control and
configure dynamic mapping.
[float]
=== Default Mapping
When a new type is created (at <<indices-create-index,index creation>> time,
using the <<indices-put-mapping,`put-mapping` API>> or just by indexing a
document into it), the type uses the `_default_` mapping as its basis. Any
mapping specified in the <<indices-create-index,`create-index`>> or
<<indices-put-mapping,`put-mapping`>> request override values set in the
`_default_` mapping.
The default mapping definition is a plain mapping definition that is
embedded within Elasticsearch:
[source,js]
--------------------------------------------------
{
_default_ : {
}
}
--------------------------------------------------
Pretty short, isn't it? Basically, everything is `_default_`ed, including the
dynamic nature of the root object mapping which allows new fields to be added
automatically.
The default mapping can be overridden by specifying the `_default_` type when
creating a new index.
[float]
=== Mapper settings
`index.mapper.dynamic` (_dynamic_)::
Dynamic creation of mappings for unmapped types can be completely
disabled by setting `index.mapper.dynamic` to `false`.

View File

@ -6,8 +6,8 @@ are scored. Similarity is per field, meaning that via the mapping one
can define a different similarity per field.
Configuring a custom similarity is considered a expert feature and the
builtin similarities are most likely sufficient as is described in the
<<mapping-core-types,mapping section>>
builtin similarities are most likely sufficient as is described in
<<similarity>>.
[float]
[[configuration]]
@ -90,7 +90,7 @@ Type name: `BM25`
==== DFR similarity
Similarity that implements the
http://lucene.apache.org/core/4_1_0/core/org/apache/lucene/search/similarities/DFRSimilarity.html[divergence
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/DFRSimilarity.html[divergence
from randomness] framework. This similarity has the following options:
[horizontal]
@ -111,7 +111,7 @@ Type name: `DFR`
[[ib]]
==== IB similarity.
http://lucene.apache.org/core/4_1_0/core/org/apache/lucene/search/similarities/IBSimilarity.html[Information
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/IBSimilarity.html[Information
based model] . This similarity has the following options:
[horizontal]
@ -125,7 +125,7 @@ Type name: `IB`
[[lm_dirichlet]]
==== LM Dirichlet similarity.
http://lucene.apache.org/core/4_7_1/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html[LM
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html[LM
Dirichlet similarity] . This similarity has the following options:
[horizontal]
@ -137,7 +137,7 @@ Type name: `LMDirichlet`
[[lm_jelinek_mercer]]
==== LM Jelinek Mercer similarity.
http://lucene.apache.org/core/4_7_1/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html[LM
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html[LM
Jelinek Mercer similarity] . This similarity has the following options:
[horizontal]

View File

@ -3,76 +3,173 @@
[partintro]
--
Mapping is the process of defining how a document should be mapped to
the Search Engine, including its searchable characteristics such as
which fields are searchable and if/how they are tokenized. In
Elasticsearch, an index may store documents of different "mapping
types". Elasticsearch allows one to associate multiple mapping
definitions for each mapping type.
Explicit mapping is defined on an index/type level. By default, there
isn't a need to define an explicit mapping, since one is automatically
created and registered when a new type or new field is introduced (with
no performance overhead) and have sensible defaults. Only when the
defaults need to be overridden must a mapping definition be provided.
Mapping is the process of defining how a document, and the fields it contains,
are stored and indexed. For instance, use mappings to define:
* which string fields should be treated as full text fields.
* which fields contain numbers, dates, or geolocations.
* whether the values of all fields in the document should be
indexed into the catch-all <<mapping-all-field,`_all`>> field.
* the <<mapping-date-format,format>> of date values.
* custom rules to control the mapping for
<<dynamic-mapping,dynamically added fields>>.
[float]
[[all-mapping-types]]
=== Mapping Types
[[mapping-type]]
== Mapping Types
Mapping types are a way to divide the documents in an index into logical
groups. Think of it as tables in a database. Though there is separation
between types, it's not a full separation (all end up as a document
within the same Lucene index).
Each index has one or more _mapping types_, which are used to divide the
documents in an index into logical groups. User documents might be stored in a
`user` type, and blog posts in a `blogpost` type.
Field names with the same name across types are highly recommended to
have the same type and same mapping characteristics (analysis settings
for example). There is an effort to allow to explicitly "choose" which
field to use by using type prefix (`my_type.my_field`), but it's not
complete, and there are places where it will never work (like
aggregations on the field).
Each mapping type has:
<<mapping-fields,Meta-fields>>::
Meta-fields are used to customize how a document's metadata associated is
treated. Examples of meta-fields include the document's
<<mapping-index-field,`_index`>>, <<mapping-type-field,`_type`>>,
<<mapping-id-field,`_id`>>, and <<mapping-source-field,`_source`>> fields.
<<mapping-types,Fields>> or _properties_::
Each mapping type contains a list of fields or `properties` pertinent to that
type. A `user` type might contain `title`, `name`, and `age` fields, while a
`blogpost` type might contain `title`, `body`, `user_id` and `created` fields.
Fields with the same name in different mapping types in the same index
<<field-conflicts,must have the same mapping>>.
In practice though, this restriction is almost never an issue. The field
name usually ends up being a good indication to its "typeness" (e.g.
"first_name" will always be a string). Note also, that this does not
apply to the cross index case.
[float]
[[mapping-api]]
=== Mapping API
== Field datatypes
To create a mapping, you will need the <<indices-put-mapping,Put Mapping
API>>, or you can add multiple mappings when you <<indices-create-index,create an
index>>.
Each field has a data `type` which can be:
* a simple type like <<string,`string`>>, <<date,`date`>>, <<number,`long`>>,
<<number,`double`>>, <<boolean,`boolean`>> or <<ip,`ip`>>.
* a type which supports the hierarchical nature of JSON such as
<<object,`object`>> or <<nested,`nested`>>.
* or a specialised type like <<geo-point,`geo_point`>>,
<<geo-shape,`geo_shape`>>, or <<search-suggesters-completion,`completion`>>.
It is often useful to index the same field in different ways for different
purposes. For instance, a `string` field could be <<mapping-index,indexed>> as
an `analyzed` field for full-text search, and as a `not_analyzed` field for
sorting or aggregations. Alternatively, you could index a string field with
the <<analysis-standard-analyzer,`standard` analyzer>>, the
<<english-analyzer,`english`>> analyzer, and the
<<french-analyzer,`french` analyzer>>.
This is the purpose of _multi-fields_. Most datatypes support multi-fields
via the <<multi-fields>> parameter.
[float]
[[mapping-settings]]
=== Global Settings
== Dynamic mapping
Fields and mapping types do not need to be defined before being used. Thanks
to _dynamic mapping_, new mapping types and new field names will be added
automatically, just by indexing a document. New fields can be added both to
the top-level mapping type, and to inner <<object,`object`>> and
<<nested,`nested`>> fields.
The
<<dynamic-mapping,dynamic mapping>> rules can be configured to
customise the mapping that is used for new types and new fields.
[float]
== Explicit mappings
You know more about your data than Elasticsearch can guess, so while dynamic
mapping can be useful to get started, at some point you will want to specify
your own explicit mappings.
You can create mapping types and field mappings when you
<<indices-create-index,create an index>>, and you can add mapping types and
fields to an existing index with the <<indices-put-mapping,PUT mapping API>>.
[float]
== Updating existing mappings
Other than where documented, *existing type and field mappings cannot be
updated*. Changing the mapping would mean invalidating already indexed
documents. Instead, you should create a new index with the correct mappings
and reindex your data into that index.
[[field-conflicts]]
[float]
== Fields are shared across mapping types
Mapping types are used to group fields, but the fields in each mapping type
are not independent of each other. Fields with:
* the _same name_
* in the _same index_
* in _different mapping types_
* map to the _same field_ internally,
* and *must have the same mapping*.
If a `title` field exists in both the `user` and `blogpost` mapping types, the
`title` fields must have exactly the same mapping in each type. The only
exceptions to this rule are the <<copy-to>>, <<dynamic>>, <<enabled>>,
<<ignore-above>>, <<include-in-all>>, and <<properties>> parameters, which may
have different settings per field.
Usually, fields with the same name also contain the same type of data, so
having the same mapping is not a problem. When conflicts do arise, these can
be solved by choosing more descriptive names, such as `user_title` and
`blog_title`.
[float]
== Example mapping
A mapping for the example described above could be specified when creating the
index, as follows:
[source,js]
---------------------------------------
PUT my_index <1>
{
"mappings": {
"user": { <2>
"_all": { "enabled": false }, <3>
"properties": { <4>
"title": { "type": "string" }, <5>
"name": { "type": "string" }, <5>
"age": { "type": "integer" } <5>
}
},
"blogpost": { <2>
"properties": { <4>
"title": { "type": "string" }, <5>
"body": { "type": "string" }, <5>
"user_id": {
"type": "string", <5>
"index": "not_analyzed"
},
"created": {
"type": "date", <5>
"format": "strict_date_optional_time||epoch_millis"
}
}
}
}
}
---------------------------------------
// AUTOSENSE
<1> Create an index called `my_index`.
<2> Add mapping types called `user` and `blogpost`.
<3> Disable the `_all` <<mapping-fields,meta field>> for the `user` mapping type.
<4> Specify fields or _properties_ in each mapping type.
<5> Specify the data `type` and mapping for each field.
The `index.mapping.ignore_malformed` global setting can be set on the
index level to allow to ignore malformed content globally across all
mapping types (malformed content example is trying to index a text string
value as a numeric type).
The `index.mapping.coerce` global setting can be set on the
index level to coerce numeric content globally across all
mapping types (The default setting is true and coercions attempted are
to convert strings with numbers into numeric types and also numeric values
with fractions to any integer/short/long values minus the fraction part).
When the permitted conversions fail in their attempts, the value is considered
malformed and the ignore_malformed setting dictates what will happen next.
--
include::mapping/fields.asciidoc[]
include::mapping/types.asciidoc[]
include::mapping/date-format.asciidoc[]
include::mapping/fields.asciidoc[]
include::mapping/fielddata_formats.asciidoc[]
include::mapping/params.asciidoc[]
include::mapping/dynamic-mapping.asciidoc[]
include::mapping/meta.asciidoc[]
include::mapping/transform.asciidoc[]

View File

@ -1,238 +0,0 @@
[[mapping-date-format]]
== Date Format
In JSON documents, dates are represented as strings. Elasticsearch uses a set
of pre-configured format to recognize and convert those, but you can change the
defaults by specifying the `format` option when defining a `date` type, or by
specifying `dynamic_date_formats` in the `root object` mapping (which will
be used unless explicitly overridden by a `date` type). There are built in
formats supported, as well as complete custom one.
The parsing of dates uses http://www.joda.org/joda-time/[Joda]. The
default date parsing used if no format is specified is
http://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateOptionalTimeParser--[ISODateTimeFormat.dateOptionalTimeParser].
An extension to the format allow to define several formats using `||`
separator. This allows to define less strict formats that can be used,
for example, the `yyyy/MM/dd HH:mm:ss||yyyy/MM/dd` format will parse
both `yyyy/MM/dd HH:mm:ss` and `yyyy/MM/dd`. The first format will also
act as the one that converts back from milliseconds to a string
representation.
[float]
[[date-math]]
=== Date Math
The `date` type supports using date math expression when using it in a
query/filter (mainly makes sense in `range` query/filter).
The expression starts with an "anchor" date, which can be either `now`
or a date string (in the applicable format) ending with `||`. It can
then follow by a math expression, supporting `+`, `-` and `/`
(rounding). The units supported are `y` (year), `M` (month), `w` (week),
`d` (day), `h` (hour), `m` (minute), and `s` (second).
Here are some samples: `now+1h`, `now+1h+1m`, `now+1h/d`,
`2012-01-01||+1M/d`.
When doing `range` type searches with rounding, the value parsed
depends on whether the end of the range is inclusive or exclusive, and
whether the beginning or end of the range. Rounding up moves to the
last millisecond of the rounding scope, and rounding down to the
first millisecond of the rounding scope. The semantics work as follows:
* `gt` - round up, and use > that value (`2014-11-18||/M` becomes `2014-11-30T23:59:59.999`, ie excluding the entire month)
* `gte` - round D down, and use >= that value (`2014-11-18||/M` becomes `2014-11-01`, ie including the entire month)
* `lt` - round D down, and use < that value (`2014-11-18||/M` becomes `2014-11-01`, ie excluding the entire month)
* `lte` - round D up, and use <= that value(`2014-11-18||/M` becomes `2014-11-30T23:59:59.999`, ie including the entire month)
[float]
[[built-in]]
=== Built In Formats
Most of the below dates have a `strict` companion dates, which means, that
year, month and day parts of the week must have prepending zeros in order
to be valid. This means, that a date like `5/11/1` would not be valid, but
you would need to specify the full date, which would be `2005/11/01` in this
example. So instead of `date_optional_time` you would need to specify
`strict_date_optional_time`.
The following tables lists all the defaults ISO formats supported:
[cols="<,<",options="header",]
|=======================================================================
|Name |Description
|`basic_date`|A basic formatter for a full date as four digit year, two
digit month of year, and two digit day of month (yyyyMMdd).
|`basic_date_time`|A basic formatter that combines a basic date and time,
separated by a 'T' (yyyyMMdd'T'HHmmss.SSSZ).
|`basic_date_time_no_millis`|A basic formatter that combines a basic date
and time without millis, separated by a 'T' (yyyyMMdd'T'HHmmssZ).
|`basic_ordinal_date`|A formatter for a full ordinal date, using a four
digit year and three digit dayOfYear (yyyyDDD).
|`basic_ordinal_date_time`|A formatter for a full ordinal date and time,
using a four digit year and three digit dayOfYear
(yyyyDDD'T'HHmmss.SSSZ).
|`basic_ordinal_date_time_no_millis`|A formatter for a full ordinal date
and time without millis, using a four digit year and three digit
dayOfYear (yyyyDDD'T'HHmmssZ).
|`basic_time`|A basic formatter for a two digit hour of day, two digit
minute of hour, two digit second of minute, three digit millis, and time
zone offset (HHmmss.SSSZ).
|`basic_time_no_millis`|A basic formatter for a two digit hour of day,
two digit minute of hour, two digit second of minute, and time zone
offset (HHmmssZ).
|`basic_t_time`|A basic formatter for a two digit hour of day, two digit
minute of hour, two digit second of minute, three digit millis, and time
zone off set prefixed by 'T' ('T'HHmmss.SSSZ).
|`basic_t_time_no_millis`|A basic formatter for a two digit hour of day,
two digit minute of hour, two digit second of minute, and time zone
offset prefixed by 'T' ('T'HHmmssZ).
|`basic_week_date`|A basic formatter for a full date as four digit
weekyear, two digit week of weekyear, and one digit day of week
(xxxx'W'wwe). `strict_basic_week_date` is supported.
|`basic_week_date_time`|A basic formatter that combines a basic weekyear
date and time, separated by a 'T' (xxxx'W'wwe'T'HHmmss.SSSZ).
`strict_basic_week_date_time` is supported.
|`basic_week_date_time_no_millis`|A basic formatter that combines a basic
weekyear date and time without millis, separated by a 'T'
(xxxx'W'wwe'T'HHmmssZ). `strict_week_date_time` is supported.
|`date`|A formatter for a full date as four digit year, two digit month
of year, and two digit day of month (yyyy-MM-dd). `strict_date` is supported.
_
|`date_hour`|A formatter that combines a full date and two digit hour of
day. strict_date_hour` is supported.
|`date_hour_minute`|A formatter that combines a full date, two digit hour
of day, and two digit minute of hour. strict_date_hour_minute` is supported.
|`date_hour_minute_second`|A formatter that combines a full date, two
digit hour of day, two digit minute of hour, and two digit second of
minute. `strict_date_hour_minute_second` is supported.
|`date_hour_minute_second_fraction`|A formatter that combines a full
date, two digit hour of day, two digit minute of hour, two digit second
of minute, and three digit fraction of second
(yyyy-MM-dd'T'HH:mm:ss.SSS). `strict_date_hour_minute_second_fraction` is supported.
|`date_hour_minute_second_millis`|A formatter that combines a full date,
two digit hour of day, two digit minute of hour, two digit second of
minute, and three digit fraction of second (yyyy-MM-dd'T'HH:mm:ss.SSS).
`strict_date_hour_minute_second_millis` is supported.
|`date_optional_time`|a generic ISO datetime parser where the date is
mandatory and the time is optional. `strict_date_optional_time` is supported.
|`date_time`|A formatter that combines a full date and time, separated by
a 'T' (yyyy-MM-dd'T'HH:mm:ss.SSSZZ). `strict_date_time` is supported.
|`date_time_no_millis`|A formatter that combines a full date and time
without millis, separated by a 'T' (yyyy-MM-dd'T'HH:mm:ssZZ).
`strict_date_time_no_millis` is supported.
|`hour`|A formatter for a two digit hour of day. `strict_hour` is supported.
|`hour_minute`|A formatter for a two digit hour of day and two digit
minute of hour. `strict_hour_minute` is supported.
|`hour_minute_second`|A formatter for a two digit hour of day, two digit
minute of hour, and two digit second of minute.
`strict_hour_minute_second` is supported.
|`hour_minute_second_fraction`|A formatter for a two digit hour of day,
two digit minute of hour, two digit second of minute, and three digit
fraction of second (HH:mm:ss.SSS).
`strict_hour_minute_second_fraction` is supported.
|`hour_minute_second_millis`|A formatter for a two digit hour of day, two
digit minute of hour, two digit second of minute, and three digit
fraction of second (HH:mm:ss.SSS).
`strict_hour_minute_second_millis` is supported.
|`ordinal_date`|A formatter for a full ordinal date, using a four digit
year and three digit dayOfYear (yyyy-DDD). `strict_ordinal_date` is supported.
|`ordinal_date_time`|A formatter for a full ordinal date and time, using
a four digit year and three digit dayOfYear (yyyy-DDD'T'HH:mm:ss.SSSZZ).
`strict_ordinal_date_time` is supported.
|`ordinal_date_time_no_millis`|A formatter for a full ordinal date and
time without millis, using a four digit year and three digit dayOfYear
(yyyy-DDD'T'HH:mm:ssZZ).
`strict_ordinal_date_time_no_millis` is supported.
|`time`|A formatter for a two digit hour of day, two digit minute of
hour, two digit second of minute, three digit fraction of second, and
time zone offset (HH:mm:ss.SSSZZ). `strict_time` is supported.
|`time_no_millis`|A formatter for a two digit hour of day, two digit
minute of hour, two digit second of minute, and time zone offset
(HH:mm:ssZZ). `strict_time_no_millis` is supported.
|`t_time`|A formatter for a two digit hour of day, two digit minute of
hour, two digit second of minute, three digit fraction of second, and
time zone offset prefixed by 'T' ('T'HH:mm:ss.SSSZZ).
`strict_t_time` is supported.
|`t_time_no_millis`|A formatter for a two digit hour of day, two digit
minute of hour, two digit second of minute, and time zone offset
prefixed by 'T' ('T'HH:mm:ssZZ). `strict_t_time_no_millis` is supported.
|`week_date`|A formatter for a full date as four digit weekyear, two
digit week of weekyear, and one digit day of week (xxxx-'W'ww-e).
`strict_week_date` is supported.
|`week_date_time`|A formatter that combines a full weekyear date and
time, separated by a 'T' (xxxx-'W'ww-e'T'HH:mm:ss.SSSZZ).
`strict_week_date_time` is supported.
|`week_date_time_no_millis`|A formatter that combines a full weekyear date
and time without millis, separated by a 'T' (xxxx-'W'ww-e'T'HH:mm:ssZZ).
`strict_week_date_time` is supported.
|`weekyear`|A formatter for a four digit weekyear. `strict_week_year` is supported.
|`weekyear_week`|A formatter for a four digit weekyear and two digit week
of weekyear. `strict_weekyear_week` is supported.
|`weekyear_week_day`|A formatter for a four digit weekyear, two digit week
of weekyear, and one digit day of week. `strict_weekyear_week_day` is supported.
|`year`|A formatter for a four digit year. `strict_year` is supported.
|`year_month`|A formatter for a four digit year and two digit month of
year. `strict_year_month` is supported.
|`year_month_day`|A formatter for a four digit year, two digit month of
year, and two digit day of month. `strict_year_month_day` is supported.
|`epoch_second`|A formatter for the number of seconds since the epoch.
Note, that this timestamp allows a max length of 10 chars, so dates
older than 1653 and 2286 are not supported. You should use a different
date formatter in that case.
|`epoch_millis`|A formatter for the number of milliseconds since the epoch.
Note, that this timestamp allows a max length of 13 chars, so dates
older than 1653 and 2286 are not supported. You should use a different
date formatter in that case.
|=======================================================================
[float]
[[custom]]
=== Custom Format
Allows for a completely customizable date format explained
http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html[here].

View File

@ -1,73 +1,67 @@
[[mapping-dynamic-mapping]]
[[dynamic-mapping]]
== Dynamic Mapping
Default mappings allow generic mapping definitions to be automatically applied
to types that do not have mappings predefined. This is mainly done
thanks to the fact that the
<<mapping-object-type,object mapping>> and
namely the <<mapping-root-object-type,root
object mapping>> allow for schema-less dynamic addition of unmapped
fields.
The default mapping definition is a plain mapping definition that is
embedded within the distribution:
One of the most important features of Elasticsearch is that it tries to get
out of your way and let you start exploring your data as quickly as possible.
To index a document, you don't have to first create an index, define a mapping
type, and define your fields -- you can just index a document and the index,
type, and fields will spring to life automatically:
[source,js]
--------------------------------------------------
{
"_default_" : {
}
}
PUT data/counters/1 <1>
{ "count": 5 }
--------------------------------------------------
// AUTOSENSE
<1> Creates the `data` index, the `counters` mapping type, and a field
called `count` with datatype `long`.
Pretty short, isn't it? Basically, everything is defaulted, especially the
dynamic nature of the root object mapping. The default mapping can be
overridden by specifying the `_default_` type when creating a new index.
The automatic detection and addition of new types and fields is called
_dynamic mapping_. The dynamic mapping rules can be customised to suit your
purposes with:
The dynamic creation of mappings for unmapped types can be completely
disabled by setting `index.mapper.dynamic` to `false`.
<<default-mapping,`_default_` mapping>>::
The dynamic creation of fields within a type can be completely
disabled by setting the `dynamic` property of the type to `strict`.
Configure the base mapping to be used for new mapping types.
Here is a <<indices-put-mapping,Put Mapping>> example that
disables dynamic field creation for a `tweet`:
<<dynamic-field-mapping,Dynamic field mappings>>::
[source,js]
--------------------------------------------------
$ curl -XPUT 'http://localhost:9200/twitter/_mapping/tweet' -d '
{
"tweet" : {
"dynamic": "strict",
"properties" : {
"message" : {"type" : "string", "store" : true }
}
}
}
'
--------------------------------------------------
The rules governing dynamic field detection.
Here is how we can change the default
<<mapping-date-format,date_formats>> used in the
root and inner object types:
<<dynamic-templates,Dynamic templates>>::
Custom rules to configure the mapping for dynamically added fields.
TIP: <<indices-templates,Index templates>> allow you to configure the default
mappings, settings, aliases, and warmers for new indices, whether created
automatically or explicitly.
[source,js]
--------------------------------------------------
{
"_default_" : {
"dynamic_date_formats" : ["yyyy-MM-dd", "dd-MM-yyyy", "date_optional_time"]
}
}
--------------------------------------------------
[float]
=== Unmapped fields in queries
=== Disabling automatic type creation
Queries and filters can refer to fields that don't exist in a mapping. Whether this
is allowed is controlled by the `index.query.parse.allow_unmapped_fields` setting.
This setting defaults to `true`. Setting it to `false` will disallow the usage of
unmapped fields in queries.
Automatic type creation can be disabled by setting the `index.mapper.dynamic`
setting to `false`, either by setting the default value in the
`config/elasticsearch.yml` file, or per-index as an index setting:
[source,js]
--------------------------------------------------
PUT /_settings <1>
{
"index.mapper.dynamic":false
}
--------------------------------------------------
// AUTOSENSE
<1> Disable automatic type creation for all indices.
Regardless of the value of this setting, types can still be added explicitly
when <<indices-create-index,creating an index>> or with the
<<indices-put-mapping,PUT mapping>> API.
include::dynamic/default-mapping.asciidoc[]
include::dynamic/field-mapping.asciidoc[]
include::dynamic/templates.asciidoc[]
When registering a new <<search-percolate,percolator query>> or creating
a <<filtered,filtered alias>> then the `index.query.parse.allow_unmapped_fields` setting
is forcefully overwritten to disallowed unmapped fields.

View File

@ -0,0 +1,82 @@
[[default-mapping]]
=== `_default_` mapping
The default mapping, which will be used as the base mapping for any new
mapping types, can be customised by adding a mapping type with the name
`_default_` to an index, either when
<<indices-create-index,creating the index>> or later on with the
<<indices-put-mapping,PUT mapping>> API.
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"_default_": { <1>
"_all": {
"enabled": false
}
},
"user": {}, <2>
"blogpost": { <3>
"_all": {
"enabled": true
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> The `_default_` mapping defaults the <<mapping-all-field,`_all`>> field to disabled.
<2> The `user` type inherits the settings from `_default_`.
<3> The `blogpost` type overrides the defaults and enables the <<mapping-all-field,`_all`>> field.
While the `_default_` mapping can be updated after an index has been created,
the new defaults will only affect mapping types that are created afterwards.
The `_default_` mapping can be used in conjunction with
<<indices-templates,Index templates>> to control dynamically created types
within automatically created indices:
[source,js]
--------------------------------------------------
PUT _template/logging
{
"template": "logs-*", <1>
"settings": { "number_of_shards": 1 }, <2>
"mappings": {
"_default_": {
"_all": { <3>
"enabled": false
},
"dynamic_templates": [
{
"strings": { <4>
"match_mapping_type": "string",
"mapping": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
}
}
}
]
}
}
}
PUT logs-2015.10.01/event/1
{ "message": "error:16" }
--------------------------------------------------
// AUTOSENSE
<1> The `logging` template will match any indices beginning with `logs-`.
<2> Matching indices will be created with a single primary shard.
<3> The `_all` field will be disabled by default for new type mappings.
<4> String fields will be created with an `analyzed` main field, and a `not_analyzed` `.raw` field.

View File

@ -0,0 +1,139 @@
[[dynamic-field-mapping]]
=== Dynamic field mapping
By default, when a previously unseen field is found in a document,
Elasticsearch will add the new field to the type mapping. This behaviour can
be disabled, both at the document and at the <<object,`object`>> level, by
setting the <<dynamic,`dynamic`>> parameter to `false` or to `strict`.
Assuming `dynamic` field mapping is enabled, some simple rules are used to
determine which datatype the field should have:
[horizontal]
*JSON datatype*:: *Elasticsearch datatype*
`null`:: No field is added.
`true` or `false`:: <<boolean,`boolean`>> field
floating{nbsp}point{nbsp}number:: <<number,`double`>> field
integer:: <<number,`long`>> field
object:: <<object,`object`>> field
array:: Depends on the first non-`null` value in the array.
string:: Either a <<date,`date`>> field
(if the value passes <<date-detection,date detection>>),
a <<number,`double`>> or <<number,`long`>> field
(if the value passes <<numeric-detection,numeric detection>>)
or an <<mapping-index,`analyzed`>> <<string,`string`>> field.
These are the only <<mapping-types,field datatypes>> that are dynamically
detected. All other datatypes must be mapped explicitly.
Besides the options listed below, dynamic field mapping rules can be further
customised with <<dynamic-templates,`dynamic_templates`>>.
[[date-detection]]
==== Date detection
If `date_detection` is enabled (default), then new string fields are checked
to see whether their contents match any of the date patterns specified in
`dynamic_date_formats`. If a match is found, a new <<date,`date`>> field is
added with the corresponding format.
The default value for `dynamic_date_formats` is:
&#91; <<strict-date-time,`"strict_date_optional_time"`>>,`"yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"`]
For example:
[source,js]
--------------------------------------------------
PUT my_index/my_type/1
{
"create_date": "2015/09/02"
}
GET my_index/_mapping <1>
--------------------------------------------------
// AUTOSENSE
<1> The `create_date` field has been added as a <<date,`date`>>
field with the <<mapping-date-format,`format`>>: +
`"yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"`.
===== Disabling date detection
Dynamic date dection can be disabled by setting `date_detection` to `false`:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"date_detection": false
}
}
}
PUT my_index/my_type/1 <1>
{
"create": "2015/09/02"
}
--------------------------------------------------
// AUTOSENSE
<1> The `create_date` field has been added as a <<string,`string`>> field.
===== Customising detected date formats
Alternatively, the `dynamic_date_formats` can be customised to support your
own <<mapping-date-format,date formats>>:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"dynamic_date_formats": ["MM/dd/yyyy"]
}
}
}
PUT my_index/my_type/1
{
"create_date": "09/25/2015"
}
--------------------------------------------------
// AUTOSENSE
[[numeric-detection]]
==== Numeric detection
While JSON has support for native floating point and integer datatypes, some
applications or languages may sometimes render numbers as strings. Usually the
correct solution is to map these fields explicitly, but numeric detection
(which is disabled by default) can be enabled to do this automatically:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"numeric_detection": true
}
}
}
PUT my_index/my_type/1
{
"my_float": "1.0", <1>
"my_integer": "1" <2>
}
--------------------------------------------------
// AUTOSENSE
<1> The `my_float` field is added as a <<number,`double`>> field.
<2> The `my_integer` field is added as a <<number,`long`>> field.

View File

@ -0,0 +1,251 @@
[[dynamic-templates]]
=== Dynamic templates
Dynamic templates allow you to define custom mappings that can be applied to
dynamically added fields based on:
* the <<dynamic-mapping,datatype>> detected by Elasticsearch, with <<match-mapping-type,`match_mapping_type`>>.
* the name of the field, with <<match-unmatch,`match` and `unmatch`>> or <<match-pattern,`match_pattern`>>.
* the full dotted path to the field, with <<path-match-unmatch,`path_match` and `path_unmatch`>>.
The original field name `{name}` and the detected datatype
`{dynamic_type`} <<template-variables,template variables>> can be used in
the mapping specification as placeholders.
IMPORTANT: Dynamic field mappings are only added when a field contains a
concrete value -- not `null` or an empty array. This means that if the
`null_value` option is used in a `dynamic_template`, it will only be applied
after the first document with a concrete value for the field has been
indexed.
Dynamic templates are specified as an array of named objects:
[source,js]
--------------------------------------------------
"dynamic_templates": [
{
"my_template_name": { <1>
... match conditions ... <2>
"mapping": { ... } <3>
}
},
...
]
--------------------------------------------------
<1> The template name can be any string value.
<2> The match conditions can include any of : `match_mapping_type`, `match`, `match_pattern`, `unmatch`, `match_path`, `unmatch_path`.
<3> The mapping that the matched field should use.
Templates are processed in order -- the first matching template wins. New
templates can be appended to the end of the list with the
<<indices-put-mapping,PUT mapping>> API. If a new template has the same
name as an existing template, it will replace the old version.
[[match-mapping-type]]
==== `match_mapping_type`
The `match_mapping_type` matches on the datatype detected by
<<dynamic-field-mapping,dynamic field mapping>>, in other words, the datatype
that Elasticsearch thinks the field should have. Only the following datatypes
can be automatically detected: `boolean`, `date`, `double`, `long`, `object`,
`string`. It also accepts `*` to match all datatypes.
For example, if we wanted to map all integer fields as `integer` instead of
`long`, and all `string` fields as both `analyzed` and `not_analyzed`, we
could use the following template:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"dynamic_templates": [
{
"integers": {
"match_mapping_type": "long",
"mapping": {
"type": "integer"
}
}
},
{
"strings": {
"match_mapping_type": "string",
"mapping": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
}
}
}
]
}
}
}
PUT my_index/my_type/1
{
"my_integer": 5, <1>
"my_string": "Some string" <2>
}
--------------------------------------------------
// AUTOSENSE
<1> The `my_integer` field is mapped as an `integer`.
<2> The `my_string` field is mapped as an analyzed `string`, with a `not_analyzed` <<multi-fields,multi field>>.
[[match-unmatch]]
==== `match` and `unmatch`
The `match` parameter uses a pattern to match on the fieldname, while
`unmatch` uses a pattern to exclude fields matched by `match`.
The following example matches all `string` fields whose name starts with
`long_` (except for those which end with `_text`) and maps them as `long`
fields:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"dynamic_templates": [
{
"longs_as_strings": {
"match_mapping_type": "string",
"match": "long_*",
"unmatch": "*_text",
"mapping": {
"type": "long"
}
}
}
]
}
}
}
PUT my_index/my_type/1
{
"long_num": "5", <1>
"long_text": "foo" <2>
}
--------------------------------------------------
// AUTOSENSE
<1> The `long_num` field is mapped as a `long`.
<2> The `long_text` field uses the default `string` mapping.
[[match-pattern]]
==== `match_pattern`
The `match_pattern` parameter behaves just like the `match` parameter, but
supports full Java regular expression matching on the field name instead of
simple wildcards, for instance:
[source,js]
--------------------------------------------------
"match_pattern": "^profit_\d+$"
--------------------------------------------------
[[path-match-unmatch]]
==== `path_match` and `path_unmatch`
The `path_match` and `path_unmatch` parameters work in the same way as `match`
and `unmatch`, but operate on the full dotted path to the field, not just the
final name, e.g. `some_object.*.some_field`.
This example copies the values of any fields in the `name` object to the
top-level `full_name` field, except for the `middle` field:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"dynamic_templates": [
{
"full_name": {
"path_match": "name.*",
"path_unmatch": "*.middle",
"mapping": {
"type": "string",
"copy_to": "full_name"
}
}
}
]
}
}
}
PUT my_index/my_type/1
{
"name": {
"first": "Alice",
"middle": "Mary",
"last": "White"
}
}
--------------------------------------------------
// AUTOSENSE
[[template-variables]]
==== `{name}` and `{dynamic_type}`
The `{name}` and `{dynamic_type}` placeholders are replaced in the `mapping`
with the field name and detected dynamic type. The following example sets all
string fields to use an <<analyzer,`analyzer`>> with the same name as the
field, and disables <<doc-values,`doc_values`>> for all non-string fields:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"dynamic_templates": [
{
"named_analyzers": {
"match_mapping_type": "string",
"match": "*",
"mapping": {
"type": "string",
"analyzer": "{name}"
}
}
},
{
"no_doc_values": {
"match_mapping_type":"*",
"mapping": {
"type": "{dynamic_type}",
"doc_values": false
}
}
}
]
}
}
}
PUT my_index/my_type/1
{
"english": "Some English text", <1>
"count": 5 <2>
}
--------------------------------------------------
// AUTOSENSE
<1> The `english` field is mapped as a `string` field with the `english` analyzer.
<2> The `count` field is mapped as a `long` field with `doc_values` disabled

View File

@ -1,257 +0,0 @@
[[fielddata-formats]]
== Fielddata formats
The field data format controls how field data should be stored.
Depending on the field type, there might be several field data types
available. In particular, string, geo-point and numeric types support the `doc_values`
format which allows for computing the field data data-structures at indexing
time and storing them on disk. Although it will make the index larger and may
be slightly slower, this implementation will be more near-realtime-friendly
and will require much less memory from the JVM than other implementations.
Here is an example of how to configure the `tag` field to use the `paged_bytes` field
data format.
[source,js]
--------------------------------------------------
{
"tag": {
"type": "string",
"fielddata": {
"format": "paged_bytes"
}
}
}
--------------------------------------------------
It is possible to change the field data format (and the field data settings
in general) on a live index by using the update mapping API.
[float]
=== String field data types
`paged_bytes` (default on analyzed string fields)::
Stores unique terms sequentially in a large buffer and maps documents to
the indices of the terms they contain in this large buffer.
`doc_values` (default when index is set to `not_analyzed`)::
Computes and stores field data data-structures on disk at indexing time.
Lowers memory usage but only works on non-analyzed strings (`index`: `no` or
`not_analyzed`).
[float]
=== Numeric field data types
`array`::
Stores field values in memory using arrays.
`doc_values` (default unless doc values are disabled)::
Computes and stores field data data-structures on disk at indexing time.
[float]
=== Geo point field data types
`array`::
Stores latitudes and longitudes in arrays.
`doc_values` (default unless doc values are disabled)::
Computes and stores field data data-structures on disk at indexing time.
[float]
[[global-ordinals]]
=== Global ordinals
Global ordinals is a data-structure on top of field data, that maintains an
incremental numbering for all the terms in field data in a lexicographic order.
Each term has a unique number and the number of term 'A' is lower than the number
of term 'B'. Global ordinals are only supported on string fields.
Field data on string also has ordinals, which is a unique numbering for all terms
in a particular segment and field. Global ordinals just build on top of this,
by providing a mapping between the segment ordinals and the global ordinals.
The latter being unique across the entire shard.
Global ordinals can be beneficial in search features that use segment ordinals already
such as the terms aggregator to improve the execution time. Often these search features
need to merge the segment ordinal results to a cross segment terms result. With
global ordinals this mapping happens during field data load time instead of during each
query execution. With global ordinals search features only need to resolve the actual
term when building the (shard) response, but during the execution there is no need
at all to use the actual terms and the unique numbering global ordinals provided is
sufficient and improves the execution time.
Global ordinals for a specified field are tied to all the segments of a shard (Lucene index),
which is different than for field data for a specific field which is tied to a single segment.
For this reason global ordinals need to be rebuilt in its entirety once new segments
become visible. This one time cost would happen anyway without global ordinals, but
then it would happen for each search execution instead!
The loading time of global ordinals depends on the number of terms in a field, but in general
it is low, since it source field data has already been loaded. The memory overhead of global
ordinals is a small because it is very efficiently compressed. Eager loading of global ordinals
can move the loading time from the first search request, to the refresh itself.
[float]
[[fielddata-loading]]
=== Fielddata loading
By default, field data is loaded lazily, ie. the first time that a query that
requires them is executed. However, this can make the first requests that
follow a merge operation quite slow since fielddata loading is a heavy
operation.
It is possible to force field data to be loaded and cached eagerly through the
`loading` setting of fielddata:
[source,js]
--------------------------------------------------
{
"category": {
"type": "string",
"fielddata": {
"loading": "eager"
}
}
}
--------------------------------------------------
Global ordinals can also be eagerly loaded:
[source,js]
--------------------------------------------------
{
"category": {
"type": "string",
"fielddata": {
"loading": "eager_global_ordinals"
}
}
}
--------------------------------------------------
With the above setting both field data and global ordinals for a specific field
are eagerly loaded.
[float]
==== Disabling field data loading
Field data can take a lot of RAM so it makes sense to disable field data
loading on the fields that don't need field data, for example those that are
used for full-text search only. In order to disable field data loading, just
change the field data format to `disabled`. When disabled, all requests that
will try to load field data, e.g. when they include aggregations and/or sorting,
will return an error.
[source,js]
--------------------------------------------------
{
"text": {
"type": "string",
"fielddata": {
"format": "disabled"
}
}
}
--------------------------------------------------
The `disabled` format is supported by all field types.
[float]
[[field-data-filtering]]
=== Filtering fielddata
It is possible to control which field values are loaded into memory,
which is particularly useful for string fields. When specifying the
<<mapping-core-types,mapping>> for a field, you
can also specify a fielddata filter.
Fielddata filters can be changed using the
<<indices-put-mapping,PUT mapping>>
API. After changing the filters, use the
<<indices-clearcache,Clear Cache>> API
to reload the fielddata using the new filters.
[float]
==== Filtering by frequency:
The frequency filter allows you to only load terms whose frequency falls
between a `min` and `max` value, which can be expressed an absolute
number (when the number is bigger than 1.0) or as a percentage
(eg `0.01` is `1%` and `1.0` is `100%`). Frequency is calculated
*per segment*. Percentages are based on the number of docs which have a
value for the field, as opposed to all docs in the segment.
Small segments can be excluded completely by specifying the minimum
number of docs that the segment should contain with `min_segment_size`:
[source,js]
--------------------------------------------------
{
"tag": {
"type": "string",
"fielddata": {
"filter": {
"frequency": {
"min": 0.001,
"max": 0.1,
"min_segment_size": 500
}
}
}
}
}
--------------------------------------------------
[float]
==== Filtering by regex
Terms can also be filtered by regular expression - only values which
match the regular expression are loaded. Note: the regular expression is
applied to each term in the field, not to the whole field value. For
instance, to only load hashtags from a tweet, we can use a regular
expression which matches terms beginning with `#`:
[source,js]
--------------------------------------------------
{
"tweet": {
"type": "string",
"analyzer": "whitespace"
"fielddata": {
"filter": {
"regex": {
"pattern": "^#.*"
}
}
}
}
}
--------------------------------------------------
[float]
==== Combining filters
The `frequency` and `regex` filters can be combined:
[source,js]
--------------------------------------------------
{
"tweet": {
"type": "string",
"analyzer": "whitespace"
"fielddata": {
"filter": {
"regex": {
"pattern": "^#.*",
},
"frequency": {
"min": 0.001,
"max": 0.1,
"min_segment_size": 500
}
}
}
}
}
--------------------------------------------------

View File

@ -5,7 +5,8 @@ Each document has metadata associated with it, such as the `_index`, mapping
<<mapping-type-field,`_type`>>, and `_id` meta-fields. The behaviour of some of these meta-fields
can be customised when a mapping type is created.
The meta-fields are:
[float]
=== Identity meta-fields
[horizontal]
<<mapping-index-field,`_index`>>::
@ -18,16 +19,26 @@ The meta-fields are:
<<mapping-type-field,`_type`>>::
The document's <<all-mapping-types,mapping type>>.
The document's <<mapping-type,mapping type>>.
<<mapping-id-field,`_id`>>::
The document's ID.
[float]
=== Document source meta-fields
<<mapping-source-field,`_source`>>::
The original JSON representing the body of the document.
<<mapping-size-field,`_size`>>::
The size of the `_source` field in bytes.
[float]
=== Indexing meta-fields
<<mapping-all-field,`_all`>>::
A _catch-all_ field that indexes the values of all other fields.
@ -36,18 +47,6 @@ The meta-fields are:
All fields in the document which contain non-null values.
<<mapping-parent-field,`_parent`>>::
Used to create a parent-child relationship between two mapping types.
<<mapping-routing-field,`_routing`>>::
A custom routing value which routes a document to a particular shard.
<<mapping-size-field,`_size`>>::
The size of the `_source` field in bytes.
<<mapping-timestamp-field,`_timestamp`>>::
A timestamp associated with the document, either specified manually or auto-generated.
@ -56,27 +55,49 @@ The meta-fields are:
How long a document should live before it is automatically deleted.
include::fields/index-field.asciidoc[]
[float]
=== Routing meta-fields
include::fields/uid-field.asciidoc[]
<<mapping-parent-field,`_parent`>>::
include::fields/type-field.asciidoc[]
Used to create a parent-child relationship between two mapping types.
<<mapping-routing-field,`_routing`>>::
A custom routing value which routes a document to a particular shard.
[float]
=== Other meta-field
<<mapping-meta-field,`_meta`>>::
Application specific metadata.
include::fields/id-field.asciidoc[]
include::fields/source-field.asciidoc[]
include::fields/all-field.asciidoc[]
include::fields/field-names-field.asciidoc[]
include::fields/id-field.asciidoc[]
include::fields/index-field.asciidoc[]
include::fields/meta-field.asciidoc[]
include::fields/parent-field.asciidoc[]
include::fields/routing-field.asciidoc[]
include::fields/size-field.asciidoc[]
include::fields/source-field.asciidoc[]
include::fields/timestamp-field.asciidoc[]
include::fields/ttl-field.asciidoc[]
include::fields/type-field.asciidoc[]
include::fields/uid-field.asciidoc[]

View File

@ -151,82 +151,18 @@ PUT my_index
<1> The `_all` field is disabled for the `my_type` type.
<2> The `query_string` query will default to querying the `content` field in this index.
[[include-in-all]]
==== Including specific fields in `_all`
[[excluding-from-all]]
==== Excluding fields from `_all`
Individual fields can be included or excluded from the `_all` field with the
`include_in_all` setting, which defaults to `true`:
<<include-in-all,`include_in_all`>> setting.
[source,js]
--------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"title": { <1>
"type": "string"
}
"content": { <1>
"type": "string"
},
"date": { <2>
"type": "date",
"include_in_all": false
}
}
}
}
}
--------------------------------
// AUTOSENSE
<1> The `title` and `content` fields with be included in the `_all` field.
<2> The `date` field will not be included in the `_all` field.
The `include_in_all` parameter can also be set at the type level and on
<<mapping-object-type,`object`>> or <<mapping-nested-type,`nested`>> fields,
in which case all sub-fields inherit that setting. For instance:
[source,js]
--------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"include_in_all": false, <1>
"properties": {
"title": { "type": "string" },
"author": {
"include_in_all": true, <2>
"properties": {
"first_name": { "type": "string" },
"last_name": { "type": "string" }
}
},
"editor": {
"properties": {
"first_name": { "type": "string" }, <3>
"last_name": { "type": "string", "include_in_all": true } <3>
}
}
}
}
}
}
--------------------------------
// AUTOSENSE
<1> All fields in `my_type` are excluded from `_all`.
<2> The `author.first_name` and `author.last_name` fields are included in `_all`.
<3> Only the `editor.last_name` field is included in `_all`.
The `editor.first_name` inherits the type-level setting and is excluded.
[[all-field-and-boosting]]
==== Index boosting and the `_all` field
Individual fields can be _boosted_ at index time, with the `boost` parameter.
The `_all` field takes these boosts into account:
Individual fields can be _boosted_ at index time, with the <<index-boost,`boost`>>
parameter. The `_all` field takes these boosts into account:
[source,js]
--------------------------------

View File

@ -2,8 +2,8 @@
=== `_id` field
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
<<all-mapping-types,Mapping Types>>) and an <<mapping-id-field,`_id`>>. The
`_id` field is not indexed as its value can be derived automatically from the
<<mapping-type>>) and an <<mapping-id-field,`_id`>>. The `_id` field is not
indexed as its value can be derived automatically from the
<<mapping-uid-field,`_uid`>> field.
The value of the `_id` field is accessible in queries and scripts, but _not_

View File

@ -0,0 +1,30 @@
[[mapping-meta-field]]
=== `_meta` field
Each mapping type can have custom meta data associated with it. These are not
used at all by Elasticsearch, but can be used to store application-specific
metadata, such as the class that a document belongs to:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"user": {
"_meta": { <1>
"class": "MyApp::User",
"version": {
"min": "1.0",
"max": "1.3"
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> This `_meta` info can be retrieved with the
<<indices-get-mapping,GET mapping>> API.
The `_meta` field can be updated on an existing type using the
<<indices-put-mapping,PUT mapping>> API.

View File

@ -78,8 +78,7 @@ stored.
WARNING: Removing fields from the `_source` has similar downsides to disabling
`_source`, especially the fact that you cannot reindex documents from one
Elasticsearch index to another. Consider using
<<search-request-source-filtering,source filtering>> or a
<<mapping-transform,transform script>> instead.
<<search-request-source-filtering,source filtering>> instead.
The `includes`/`excludes` parameters (which also accept wildcards) can be used
as follows:

View File

@ -1,5 +1,5 @@
[[mapping-ttl-field]]
=== `_ttl`
=== `_ttl` field
Some types of documents, such as session data or special offers, come with an
expiration date. The `_ttl` field allows you to specify the minimum time a

View File

@ -2,8 +2,8 @@
=== `_type` field
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
<<all-mapping-types,Mapping Types>>) and an <<mapping-id-field,`_id`>>. The
`_type` field is indexed in order to make searching by type name fast.
<<mapping-type>>) and an <<mapping-id-field,`_id`>>. The `_type` field is
indexed in order to make searching by type name fast.
The value of the `_type` field is accessible in queries, aggregations,
scripts, and when sorting:

View File

@ -2,8 +2,8 @@
=== `_uid` field
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
<<all-mapping-types,Mapping Types>>) and an <<mapping-id-field,`_id`>>. These
values are combined as `{type}#{id}` and indexed as the `_uid` field.
<<mapping-type>>) and an <<mapping-id-field,`_id`>>. These values are
combined as `{type}#{id}` and indexed as the `_uid` field.
The value of the `_uid` field is accessible in queries, aggregations, scripts,
and when sorting:

View File

@ -1,25 +0,0 @@
[[mapping-meta]]
== Meta
Each mapping can have custom meta data associated with it. These are
simple storage elements that are simply persisted along with the mapping
and can be retrieved when fetching the mapping definition. The meta is
defined under the `_meta` element, for example:
[source,js]
--------------------------------------------------
{
"tweet" : {
"_meta" : {
"attr1" : "value1",
"attr2" : {
"attr3" : "value3"
}
}
}
}
--------------------------------------------------
Meta can be handy for example for client libraries that perform
serialization and deserialization to store its meta model (for example,
the class the document maps to).

View File

@ -0,0 +1,100 @@
[[mapping-params]]
== Mapping parameters
The following pages provide detailed explanations of the various mapping
parameters that are used by <<mapping-types,field mappings>>:
The following mapping parameters are common to some or all field datatypes:
* <<analyzer,`analyzer`>>
* <<index-boost,`boost`>>
* <<coerce,`coerce`>>
* <<copy-to,`copy_to`>>
* <<doc-values,`doc_values`>>
* <<dynamic,`dynamic`>>
* <<enabled,`enabled`>>
* <<fielddata,`fielddata`>>
* <<geohash,`geohash`>>
* <<geohash-precision,`geohash_precision`>>
* <<geohash-prefix,`geohash_prefix`>>
* <<mapping-date-format,`format`>>
* <<ignore-above,`ignore_above`>>
* <<ignore-malformed,`ignore_malformed`>>
* <<include-in-all,`include_in_all`>>
* <<index-options,`index_options`>>
* <<lat-lon,`lat_lon`>>
* <<mapping-index,`index`>>
* <<multi-fields,`fields`>>
* <<norms,`norms`>>
* <<null-value,`null_value`>>
* <<position-offset-gap,`position_offset_gap`>>
* <<properties,`properties`>>
* <<search-analyzer,`search_analyzer`>>
* <<similarity,`similarity`>>
* <<mapping-store,`store`>>
* <<term-vector,`term_vector`>>
include::params/analyzer.asciidoc[]
include::params/boost.asciidoc[]
include::params/coerce.asciidoc[]
include::params/copy-to.asciidoc[]
include::params/doc-values.asciidoc[]
include::params/dynamic.asciidoc[]
include::params/enabled.asciidoc[]
include::params/fielddata.asciidoc[]
include::params/format.asciidoc[]
include::params/geohash.asciidoc[]
include::params/geohash-precision.asciidoc[]
include::params/geohash-prefix.asciidoc[]
include::params/ignore-above.asciidoc[]
include::params/ignore-malformed.asciidoc[]
include::params/include-in-all.asciidoc[]
include::params/index.asciidoc[]
include::params/index-options.asciidoc[]
include::params/lat-lon.asciidoc[]
include::params/multi-fields.asciidoc[]
include::params/norms.asciidoc[]
include::params/null-value.asciidoc[]
include::params/position-offset-gap.asciidoc[]
include::params/precision-step.asciidoc[]
include::params/properties.asciidoc[]
include::params/search-analyzer.asciidoc[]
include::params/similarity.asciidoc[]
include::params/store.asciidoc[]
include::params/term-vector.asciidoc[]
[source,js]
--------------------------------------------------
--------------------------------------------------
// AUTOSENSE

View File

@ -0,0 +1,80 @@
[[analyzer]]
=== `analyzer`
The values of <<mapping-index,`analyzed`>> string fields are passed through an
<<analysis,analyzer>> to convert the string into a stream of _tokens_ or
_terms_. For instance, the string `"The quick Brown Foxes."` may, depending
on which analyzer is used, be analyzed to the tokens: `quick`, `brown`,
`fox`. These are the actual terms that are indexed for the field, which makes
it possible to search efficiently for individual words _within_ big blobs of
text.
This analysis process needs to happen not just at index time, but also at
query time: the query string needs to be passed through the same (or a
similar) analyzer so that the terms that it tries to find are in the same
format as those that exist in the index.
Elasticsearch ships with a number of <<analysis-analyzers,pre-defined analyzers>>,
which can be used without further configuration. It also ships with many
<<analysis-charfilters,character filters>>, <<analysis-tokenizers,tokenizers>>,
and <<analysis-tokenfilters>> which can be combined to configure
custom analyzers per index.
Analyzers can be specified per-query, per-field or per-index. At index time,
Elasticsearch will look for an analyzer in this order:
* The `analyzer` defined in the field mapping.
* An analyzer named `default` in the index settings.
* The <<analysis-standard-analyzer,`standard`>> analyzer.
At query time, there are a few more layers:
* The `analyzer` defined in a <<full-text-queries,full-text query>>.
* The `search_analyzer` defined in the field mapping.
* The `analyzer` defined in the field mapping.
* An analyzer named `default_search` in the index settings.
* An analyzer named `default` in the index settings.
* The <<analysis-standard-analyzer,`standard`>> analyzer.
The easiest way to specify an analyzer for a particular field is to define it
in the field mapping, as follows:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"text": { <1>
"type": "string",
"fields": {
"english": { <2>
"type": "string",
"analyzer": "english"
}
}
}
}
}
}
}
GET my_index/_analyze?field=text <3>
{
"text": "The quick Brown Foxes."
}
GET my_index/_analyze?field=text.english <4>
{
"text": "The quick Brown Foxes."
}
--------------------------------------------------
// AUTOSENSE
<1> The `text` field uses the default `standard` analyzer`.
<2> The `text.english` <<multi-fields,multi-field>> uses the `english` analyzer, which removes stop words and applies stemming.
<3> This returns the tokens: [ `the`, `quick`, `brown`, `foxes` ].
<4> This returns the tokens: [ `quick`, `brown`, `fox` ].

View File

@ -0,0 +1,59 @@
[[index-boost]]
=== `boost`
Individual fields can be _boosted_ -- count more towards the relevance score
-- at index time, with the `boost` parameter as follows:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"title": {
"type": "string",
"boost": 2 <1>
},
"content": {
"type": "string"
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> Matches on the `title` field will have twice the weight as those on the
`content` field, which has the default `boost` of `1.0`.
Note that a `title` field will usually be shorter than a `content` field. The
default relevance calculation takes field length into account, so a short
`title` field will have a higher natural boost than a long `content` field.
[WARNING]
.Why index time boosting is a bad idea
==================================================
We advise against using index time boosting for the following reasons:
* You cannot change index-time `boost` values without reindexing all of your
documents.
* Every query supports query-time boosting which achieves the same effect. The
difference is that you can tweak the `boost` value without having to reindex.
* Index-time boosts are stored as part of the <<norms,`norm`>>, which is only one
byte. This reduces the resolution of the field length normalization factor
which can lead to lower quality relevance calculations.
==================================================
The only advantage that index time boosting has is that it is copied with the
value into the <<mapping-all-field,`_all`>> field. This means that, when
querying the `_all` field, words that originated from the `title` field will
have a higher score than words that originated in the `content` field.
This functionality comes at a cost: queries on the `_all` field are slower
when index-time boosting is used.

View File

@ -0,0 +1,89 @@
[[coerce]]
=== `coerce`
Data is not always clean. Depending on how it is produced a number might be
rendered in the JSON body as a true JSON number, e.g. `5`, but it might also
be rendered as a string, e.g. `"5"`. Alternatively, a number that should be
an integer might instead be rendered as a floating point, e.g. `5.0`, or even
`"5.0"`.
Coercion attempts to clean up dirty values to fit the datatype of a field.
For instance:
* Strings will be coerced to numbers.
* Floating points will be truncated for integer values.
* Lon/lat geo-points will be normalized to a standard -180:180 / -90:90 coordinate system.
For instance:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"number_one": {
"type": "integer"
},
"number_two": {
"type": "integer",
"coerce": false
}
}
}
}
}
PUT my_index/my_type/1
{
"number_one": "10" <1>
}
PUT my_index/my_type/2
{
"number_two": "10" <2>
}
--------------------------------------------------
// AUTOSENSE
<1> The `number_one` field will contain the integer `10`.
<2> This document will be rejected because coercion is disabled.
[[coerce-setting]]
==== Index-level default
The `index.mapping.coerce` setting can be set on the index level to disable
coercion globally across all mapping types:
[source,js]
--------------------------------------------------
PUT my_index
{
"settings": {
"index.mapping.coerce": false
},
"mappings": {
"my_type": {
"properties": {
"number_one": {
"type": "integer"
},
"number_two": {
"type": "integer",
"coerce": true
}
}
}
}
}
PUT my_index/my_type/1
{ "number_one": "10" } <1>
PUT my_index/my_type/2
{ "number_two": "10" } <2>
--------------------------------------------------
// AUTOSENSE
<1> This document will be rejected because the `number_one` field inherits the index-level coercion setting.
<2> The `number_two` field overrides the index level setting to enable coercion.

View File

@ -0,0 +1,64 @@
[[copy-to]]
=== `copy_to`
The `copy_to` parameter allows you to create custom
<<mapping-all-field,`_all`>> fields. In other words, the values of multiple
fields can be copied into a group field, which can then be queried as a single
field. For instance, the `first_name` and `last_name` fields can be copied to
the `full_name` field as follows:
[source,js]
--------------------------------------------------
PUT /my_index
{
"mappings": {
"my_type": {
"properties": {
"first_name": {
"type": "string",
"copy_to": "full_name" <1>
},
"last_name": {
"type": "string",
"copy_to": "full_name" <1>
},
"full_name": {
"type": "string"
}
}
}
}
}
PUT /my_index/my_type/1
{
"first_name": "John",
"last_name": "Smith"
}
GET /my_index/_search
{
"query": {
"match": {
"full_name": { <2>
"query": "John Smith",
"operator": "and"
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> The values of the `first_name` and `last_name` fields are copied to the
`full_name` field.
<2> The `first_name` and `last_name` fields can still be queried for the
first name and last name respectively, but the `full_name` field can be
queried for both first and last names.
Some important points:
* It is the field _value_ which is copied, not the terms (which result from the analysis process).
* The original <<mapping-source-field,`_source`>> field will not be modified to show the copied values.
* The same value can be copied to multiple fields, with `"copy_to": [ "field_1", "field_2" ]`

View File

@ -0,0 +1,46 @@
[[doc-values]]
=== `doc_values`
Most fields are <<mapping-index,indexed>> by default, which makes them
searchable. The inverted index allows queries to look up the search term in
unique sorted list of terms, and from that immediately have access to the list
of documents that contain the term.
Sorting, aggregations, and access to field values in scripts requires a
different data access pattern. Instead of lookup up the term and finding
documents, we need to be able to look up the document and find the terms that
is has in a field.
Doc values are the on-disk data structure, built at document index time, which
makes this data access pattern possible. Doc values are supported on almost
all field types, with the __notable exception of `analyzed` string fields__.
All fields which support doc values have them enabled by default. If you are
sure that you don't need to sort or aggregate on a field, or access the field
value from a script, you can disable doc values in order to save disk space:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"status_code": { <1>
"type": "string",
"index": "not_analyzed"
},
"session_id": { <2>
"type": "string",
"index": "not_analyzed",
"doc_values": false
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> The `status_code` field has `doc_values` enabled by default.
<2> The `session_id` has `doc_values` disabled, but can still be queried.

View File

@ -0,0 +1,87 @@
[[dynamic]]
=== `dynamic`
By default, fields can be added _dynamically_ to a document, or to
<<object,inner objects>> within a document, just by indexing a document
containing the new field. For instance:
[source,js]
--------------------------------------------------
DELETE my_index <1>
PUT my_index/my_type/1 <2>
{
"username": "johnsmith",
"name": {
"first": "John",
"last": "Smith"
}
}
GET my_index/_mapping <3>
PUT my_index/my_type/2 <4>
{
"username": "marywhite",
"email": "mary@white.com",
"name": {
"first": "Mary",
"middle": "Alice",
"last": "White"
}
}
GET my_index/_mapping <5>
--------------------------------------------------
// AUTOSENSE
<1> First delete the index, in case it already exists.
<2> This document introduces the string field `username`, the object field
`name`, and two string fields under the `name` object which can be
referred to as `name.first` and `name.last`.
<3> Check the mapping to verify the above.
<4> This document adds two string fields: `email` and `name.middle`.
<5> Check the mapping to verify the changes.
The details of how new fields are detected and added to the mapping is explained in <<dynamic-mapping>>.
The `dynamic` setting controls whether new fields can be added dynamically or
not. It accepts three settings:
[horizontal]
`true`:: Newly detected fields are added to the mapping. (default)
`false`:: Newly detected fields are ignored. New fields must be added explicitly.
`strict`:: If new fields are detected, an exception is thrown and the document is rejected.
The `dynamic` setting may be set at the mapping type level, and on each
<<object,inner object>>. Inner objects inherit the setting from their parent
object or from the mapping type. For instance:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"dynamic": false, <1>
"properties": {
"user": { <2>
"properties": {
"name": {
"type": "string"
},
"social_networks": { <3>
"dynamic": true,
"properties": {}
}
}
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> Dynamic mapping is disabled at the type level, so no new top-level fields will be added dynamically.
<2> The `user` object inherits the type-level setting.
<3> The `user.social_networks` object enables dynamic mapping, so new fields may be added to this inner object.

View File

@ -0,0 +1,94 @@
[[enabled]]
=== `enabled`
Elasticsearch tries to index all of the fields you give it, but sometimes you
want to just store the field without indexing it. For instance, imagine that
you are using Elasticsearch as a web session store. You may want to index the
session ID and last update time, but you don't need to query or run
aggregations on the session data itself.
The `enabled` setting, which can be applied only to the mapping type and to
<<object,`object`>> fields, causes Elasticsearch to skip parsing of the
contents of the field entirely. The JSON can still be retrieved from the
<<mapping-source-field,`_source`>> field, but it is not searchable or stored
in any other way:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"session": {
"properties": {
"user_id": {
"type": "string",
"index": "not_analyzed"
},
"last_updated": {
"type": "date"
},
"session_data": { <1>
"enabled": false
}
}
}
}
}
PUT my_index/session/session_1
{
"user_id": "kimchy",
"session_data": { <2>
"arbitrary_object": {
"some_array": [ "foo", "bar", { "baz": 2 } ]
}
},
"last_updated": "2015-12-06T18:20:22"
}
PUT my_index/session/session_2
{
"user_id": "jpountz",
"session_data": "none", <3>
"last_updated": "2015-12-06T18:22:13"
}
--------------------------------------------------
// AUTOSENSE
<1> The `session_data` field is disabled.
<2> Any arbitrary data can be passed to the `session_data` field as it will be entirely ignored.
<3> The `session_data` will also ignore values that are not JSON objects.
The entire mapping type may be disabled as well, in which case the document is
stored in the <<mapping-source-field,`_source`>> field, which means it can be
retrieved, but none of its contents are indexed in any way:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"session": { <1>
"enabled": false
}
}
}
PUT my_index/session/session_1
{
"user_id": "kimchy",
"session_data": {
"arbitrary_object": {
"some_array": [ "foo", "bar", { "baz": 2 } ]
}
},
"last_updated": "2015-12-06T18:20:22"
}
GET my_index/session/session_1 <2>
GET my_index/_mapping <3>
--------------------------------------------------
// AUTOSENSE
<1> The entire `session` mapping type is disabled.
<2> The document can be retrieved.
<3> Checking the mapping reveals that no fields have been added.

View File

@ -0,0 +1,225 @@
[[fielddata]]
=== `fielddata`
Most fields are <<mapping-index,indexed>> by default, which makes them
searchable. The inverted index allows queries to look up the search term in
unique sorted list of terms, and from that immediately have access to the list
of documents that contain the term.
Sorting, aggregations, and access to field values in scripts requires a
different data access pattern. Instead of lookup up the term and finding
documents, we need to be able to look up the document and find the terms that
is has in a field.
Most fields can use index-time, on-disk <<doc-values,`doc_values`>> to support
this type of data access pattern, but `analyzed` string fields do not support
`doc_values`.
Instead, `analyzed` strings use a query-time data structure called
`fielddata`. This data structure is built on demand the first time that a
field is used for aggregations, sorting, or is accessed in a script. It is built
by reading the entire inverted index for each segment from disk, inverting the
term ↔︎ document relationship, and storing the result in memory, in the
JVM heap.
Loading fielddata is an expensive process so, once it has been loaded, it
remains in memory for the lifetime of the segment.
[WARNING]
.Fielddata can fill up your heap space
==============================================================================
Fielddata can consume a lot of heap space, especially when loading high
cardinality `analyzed` string fields. Most of the time, it doesn't make sense
to sort or aggregate on `analyzed` string fields (with the notable exception
of the
<<search-aggregations-bucket-significantterms-aggregation,`significant_terms`>>
aggregation). Always think about whether a `not_analyzed` field (which can
use `doc_values`) would be a better fit for your use case.
==============================================================================
[[fielddata-format]]
==== `fielddata.format`
For `analyzed` string fields, the fielddata `format` controls whether
fielddata should be enabled or not. It accepts: `disabled` and `paged_bytes`
(enabled, which is the default). To disable fielddata loading, you can use
the following mapping:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"text": {
"type": "string",
"fielddata": {
"format": "disabled" <1>
}
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> The `text` field cannot be used for sorting, aggregations, or in scripts.
.Fielddata and other datatypes
[NOTE]
==================================================
Historically, other field datatypes also used fielddata, but this has been replaced
by index-time, disk-based <<doc-values,`doc_values`>>.
==================================================
[[fielddata-loading]]
==== `fielddata.loading`
This per-field setting controls when fielddata is loaded into memory. It
accepts three options:
[horizontal]
`lazy`::
Fielddata is only loaded into memory when it is needed. (default)
`eager`::
Fielddata is loaded into memory before a new search segment becomes
visible to search. This can reduce the latency that a user may experience
if their search request has to trigger lazy loading from a big segment.
`eager_global_ordinals`::
Loading fielddata into memory is only part of the work that is required.
After loading the fielddata for each segment, Elasticsearch builds the
<<global-ordinals>> data structure to make a list of all unique terms
across all the segments in a shard. By default, global ordinals are built
lazily. If the field has a very high cardinality, global ordinals may
take some time to build, in which case you can use eager loading instead.
[[global-ordinals]]
.Global ordinals
*****************************************
Global ordinals is a data-structure on top of fielddata and doc values, that
maintains an incremental numbering for each unique term in a lexicographic
order. Each term has a unique number and the number of term 'A' is lower than
the number of term 'B'. Global ordinals are only supported on string fields.
Fielddata and doc values also have ordinals, which is a unique numbering for all terms
in a particular segment and field. Global ordinals just build on top of this,
by providing a mapping between the segment ordinals and the global ordinals,
the latter being unique across the entire shard.
Global ordinals are used for features that use segment ordinals, such as
sorting and the terms aggregation, to improve the execution time. A terms
aggregation relies purely on global ordinals to perform the aggregation at the
shard level, then converts global ordinals to the real term only for the final
reduce phase, which combines results from different shards.
Global ordinals for a specified field are tied to _all the segments of a
shard_, while fielddata and doc values ordinals are tied to a single segment.
which is different than for field data for a specific field which is tied to a
single segment. For this reason global ordinals need to be entirely rebuilt
whenever a once new segment becomes visible.
The loading time of global ordinals depends on the number of terms in a field, but in general
it is low, since it source field data has already been loaded. The memory overhead of global
ordinals is a small because it is very efficiently compressed. Eager loading of global ordinals
can move the loading time from the first search request, to the refresh itself.
*****************************************
[[field-data-filtering]]
==== `fielddata.filter`
Fielddata filtering can be used to reduce the number of terms loaded into
memory, and thus reduce memory usage. Terms can be filtered by _frequency_ or
by _regular expression_, or a combination of the two:
Filtering by frequency::
+
--
The frequency filter allows you to only load terms whose term frequency falls
between a `min` and `max` value, which can be expressed an absolute
number (when the number is bigger than 1.0) or as a percentage
(eg `0.01` is `1%` and `1.0` is `100%`). Frequency is calculated
*per segment*. Percentages are based on the number of docs which have a
value for the field, as opposed to all docs in the segment.
Small segments can be excluded completely by specifying the minimum
number of docs that the segment should contain with `min_segment_size`:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"tag": {
"type": "string",
"fielddata": {
"filter": {
"frequency": {
"min": 0.001,
"max": 0.1,
"min_segment_size": 500
}
}
}
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
--
Filtering by regex::
+
--
Terms can also be filtered by regular expression - only values which
match the regular expression are loaded. Note: the regular expression is
applied to each term in the field, not to the whole field value. For
instance, to only load hashtags from a tweet, we can use a regular
expression which matches terms beginning with `#`:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"tweet": {
"type": "string",
"analyzer": "whitespace",
"fielddata": {
"filter": {
"regex": {
"pattern": "^#.*"
}
}
}
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
--
These filters can be updated on an existing field mapping and will take
effect the next time the fielddata for a segment is loaded. Use the
<<indices-clearcache,Clear Cache>> API
to reload the fielddata using the new filters.

View File

@ -0,0 +1,281 @@
[[mapping-date-format]]
=== `format`
In JSON documents, dates are represented as strings. Elasticsearch uses a set
of preconfigured formats to recognize and parse these strings into a long
value representing _milliseconds-since-the-epoch_ in UTC.
Besides the <<built-in-date-formats,built-in formats>>, your own
<<custom-date-formats,custom formats>> can be specified using the familiar
`yyyy/MM/dd` syntax:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"date": {
"type": "date",
"format": "yyyy-MM-dd"
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
Many APIs which support date values also support <<date-math,date math>>
expressions, such as `now-1m/d` -- the current time, minus one month, rounded
down to the nearest day.
[[custom-date-formats]]
==== Custom date formats
Completely customizable date formats are supported. The syntax for these is explained
http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html[in the Joda docs].
[[built-in-date-formats]]
==== Built In Formats
Most of the below dates have a `strict` companion dates, which means, that
year, month and day parts of the week must have prepending zeros in order
to be valid. This means, that a date like `5/11/1` would not be valid, but
you would need to specify the full date, which would be `2005/11/01` in this
example. So instead of `date_optional_time` you would need to specify
`strict_date_optional_time`.
The following tables lists all the defaults ISO formats supported:
`epoch_millis`::
A formatter for the number of milliseconds since the epoch. Note, that
this timestamp allows a max length of 13 chars, so dates older than 1653
and 2286 are not supported. You should use a different date formatter in
that case.
`epoch_second`::
A formatter for the number of seconds since the epoch. Note, that this
timestamp allows a max length of 10 chars, so dates older than 1653 and
2286 are not supported. You should use a different date formatter in that
case.
[[strict-date-time]]`date_optional_time` or `strict_date_optional_time`::
A generic ISO datetime parser where the date is mandatory and the time is
optional.
http://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateOptionalTimeParser--[Full details here].
`basic_date`::
A basic formatter for a full date as four digit year, two digit month of
year, and two digit day of month: `yyyyMMdd`.
`basic_date_time`::
A basic formatter that combines a basic date and time, separated by a 'T':
`yyyyMMdd'T'HHmmss.SSSZ`.
`basic_date_time_no_millis`::
A basic formatter that combines a basic date and time without millis,
separated by a 'T': `yyyyMMdd'T'HHmmssZ`.
`basic_ordinal_date`::
A formatter for a full ordinal date, using a four digit year and three
digit dayOfYear: `yyyyDDD`.
`basic_ordinal_date_time`::
A formatter for a full ordinal date and time, using a four digit year and
three digit dayOfYear: `yyyyDDD'T'HHmmss.SSSZ`.
`basic_ordinal_date_time_no_millis`::
A formatter for a full ordinal date and time without millis, using a four
digit year and three digit dayOfYear: `yyyyDDD'T'HHmmssZ`.
`basic_time`::
A basic formatter for a two digit hour of day, two digit minute of hour,
two digit second of minute, three digit millis, and time zone offset:
`HHmmss.SSSZ`.
`basic_time_no_millis`::
A basic formatter for a two digit hour of day, two digit minute of hour,
two digit second of minute, and time zone offset: `HHmmssZ`.
`basic_t_time`::
A basic formatter for a two digit hour of day, two digit minute of hour,
two digit second of minute, three digit millis, and time zone off set
prefixed by 'T': `'T'HHmmss.SSSZ`.
`basic_t_time_no_millis`::
A basic formatter for a two digit hour of day, two digit minute of hour,
two digit second of minute, and time zone offset prefixed by 'T':
`'T'HHmmssZ`.
`basic_week_date` or `strict_basic_week_date`::
A basic formatter for a full date as four digit weekyear, two digit week
of weekyear, and one digit day of week: `xxxx'W'wwe`.
`basic_week_date_time` or `strict_basic_week_date_time`::
A basic formatter that combines a basic weekyear date and time, separated
by a 'T': `xxxx'W'wwe'T'HHmmss.SSSZ`.
`basic_week_date_time_no_millis` or `strict_basic_week_date_time_no_millis`::
A basic formatter that combines a basic weekyear date and time without
millis, separated by a 'T': `xxxx'W'wwe'T'HHmmssZ`.
`date` or `strict_date`::
A formatter for a full date as four digit year, two digit month of year,
and two digit day of month: `yyyy-MM-dd`.
`date_hour` or `strict_date_hour`::
A formatter that combines a full date and two digit hour of day.
`date_hour_minute` or `strict_date_hour_minute`::
A formatter that combines a full date, two digit hour of day, and two
digit minute of hour.
`date_hour_minute_second` or `strict_date_hour_minute_second`::
A formatter that combines a full date, two digit hour of day, two digit
minute of hour, and two digit second of minute.
`date_hour_minute_second_fraction` or `strict_date_hour_minute_second_fraction`::
A formatter that combines a full date, two digit hour of day, two digit
minute of hour, two digit second of minute, and three digit fraction of
second: `yyyy-MM-dd'T'HH:mm:ss.SSS`.
`date_hour_minute_second_millis` or `strict_date_hour_minute_second_millis`::
A formatter that combines a full date, two digit hour of day, two digit
minute of hour, two digit second of minute, and three digit fraction of
second: `yyyy-MM-dd'T'HH:mm:ss.SSS`.
`date_time` or `strict_date_time`::
A formatter that combines a full date and time, separated by a 'T': `yyyy-
MM-dd'T'HH:mm:ss.SSSZZ`.
`date_time_no_millis` or `strict_date_time_no_millis`::
A formatter that combines a full date and time without millis, separated
by a 'T': `yyyy-MM-dd'T'HH:mm:ssZZ`.
`hour` or `strict_hour`::
A formatter for a two digit hour of day.
`hour_minute` or `strict_hour_minute`::
A formatter for a two digit hour of day and two digit minute of hour.
`hour_minute_second` or `strict_hour_minute_second`::
A formatter for a two digit hour of day, two digit minute of hour, and two
digit second of minute.
`hour_minute_second_fraction` or `strict_hour_minute_second_fraction`::
A formatter for a two digit hour of day, two digit minute of hour, two
digit second of minute, and three digit fraction of second: `HH:mm:ss.SSS`.
`hour_minute_second_millis` or `strict_hour_minute_second_millis`::
A formatter for a two digit hour of day, two digit minute of hour, two
digit second of minute, and three digit fraction of second: `HH:mm:ss.SSS`.
`ordinal_date` or `strict_ordinal_date`::
A formatter for a full ordinal date, using a four digit year and three
digit dayOfYear: `yyyy-DDD`.
`ordinal_date_time` or `strict_ordinal_date_time`::
A formatter for a full ordinal date and time, using a four digit year and
three digit dayOfYear: `yyyy-DDD'T'HH:mm:ss.SSSZZ`.
`ordinal_date_time_no_millis` or `strict_ordinal_date_time_no_millis`::
A formatter for a full ordinal date and time without millis, using a four
digit year and three digit dayOfYear: `yyyy-DDD'T'HH:mm:ssZZ`.
`time` or `strict_time`::
A formatter for a two digit hour of day, two digit minute of hour, two
digit second of minute, three digit fraction of second, and time zone
offset: `HH:mm:ss.SSSZZ`.
`time_no_millis` or `strict_time_no_millis`::
A formatter for a two digit hour of day, two digit minute of hour, two
digit second of minute, and time zone offset: `HH:mm:ssZZ`.
`t_time` or `strict_t_time`::
A formatter for a two digit hour of day, two digit minute of hour, two
digit second of minute, three digit fraction of second, and time zone
offset prefixed by 'T': `'T'HH:mm:ss.SSSZZ`.
`t_time_no_millis` or `strict_t_time_no_millis`::
A formatter for a two digit hour of day, two digit minute of hour, two
digit second of minute, and time zone offset prefixed by 'T': `'T'HH:mm:ssZZ`.
`week_date` or `strict_week_date`::
A formatter for a full date as four digit weekyear, two digit week of
weekyear, and one digit day of week: `xxxx-'W'ww-e`.
`week_date_time` or `strict_week_date_time`::
A formatter that combines a full weekyear date and time, separated by a
'T': `xxxx-'W'ww-e'T'HH:mm:ss.SSSZZ`.
`week_date_time_no_millis` or `strict_week_date_time_no_millis`::
A formatter that combines a full weekyear date and time without millis,
separated by a 'T': `xxxx-'W'ww-e'T'HH:mm:ssZZ`.
`weekyear` or `strict_weekyear`::
A formatter for a four digit weekyear.
`weekyear_week` or `strict_weekyear_week`::
A formatter for a four digit weekyear and two digit week of weekyear.
`weekyear_week_day` or `strict_weekyear_week_day`::
A formatter for a four digit weekyear, two digit week of weekyear, and one
digit day of week.
`year` or `strict_year`::
A formatter for a four digit year.
`year_month` or `strict_year_month`::
A formatter for a four digit year and two digit month of year.
`year_month_day` or `strict_year_month_day`::
A formatter for a four digit year, two digit month of year, and two digit
day of month.

View File

@ -0,0 +1,60 @@
[[geohash-precision]]
=== `geohash_precision`
Geohashes are a form of lat/lon encoding which divides the earth up into
a grid. Each cell in this grid is represented by a geohash string. Each
cell in turn can be further subdivided into smaller cells which are
represented by a longer string. So the longer the geohash, the smaller
(and thus more accurate) the cell is.
The `geohash_precision` setting controls the length of the geohash that is
indexed when the <<geohash,`geohash`>> option is enabled, and the maximum
geohash length when the <<geohash-prefix,`geohash_prefix`>> option is enabled.
It accepts:
* a number between 1 and 12 (default), which represents the length of the geohash.
* a <<distance-units,distance>>, e.g. `1km`.
If a distance is specified, it will be translated to the smallest
geohash-length that will provide the requested resolution.
For example:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"location": {
"type": "geo_point",
"geohash_prefix": true,
"geohash_precision": 6 <1>
}
}
}
}
}
PUT my_index/my_type/1
{
"location": {
"lat": 41.12,
"lon": -71.34
}
}
GET my_index/_search?fielddata_fields=location.geohash
{
"query": {
"term": {
"location.geohash": "drm3bt"
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> A `geohash_precision` of 6 equates to geohash cells of approximately 1.26km x 0.6km

View File

@ -0,0 +1,64 @@
[[geohash-prefix]]
=== `geohash_prefix`
Geohashes are a form of lat/lon encoding which divides the earth up into
a grid. Each cell in this grid is represented by a geohash string. Each
cell in turn can be further subdivided into smaller cells which are
represented by a longer string. So the longer the geohash, the smaller
(and thus more accurate) the cell is.
While the <<geohash,`geohash`>> option enables indexing the geohash that
corresponds to the lat/lon point, at the specified
<<geohash-precision,precision>>, the `geohash_prefix` option will also
index all the enclosing cells as well.
For instance, a geohash of `drm3btev3e86` will index all of the following
terms: [ `d`, `dr`, `drm`, `drm3`, `drm3b`, `drm3bt`, `drm3bte`, `drm3btev`,
`drm3btev3`, `drm3btev3e`, `drm3btev3e8`, `drm3btev3e86` ].
The geohash prefixes can be used with the
<<query-dsl-geohash-cell-query,`geohash_cell` query>> to find points within a
particular geohash, or its neighbours:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"location": {
"type": "geo_point",
"geohash_prefix": true,
"geohash_precision": 6
}
}
}
}
}
PUT my_index/my_type/1
{
"location": {
"lat": 41.12,
"lon": -71.34
}
}
GET my_index/_search?fielddata_fields=location.geohash
{
"query": {
"geohash_cell": {
"location": {
"lat": 41.02,
"lon": -71.48
},
"precision": 4, <1>
"neighbors": true <1>
}
}
}
--------------------------------------------------
// AUTOSENSE

View File

@ -0,0 +1,70 @@
[[geohash]]
=== `geohash`
Geohashes are a form of lat/lon encoding which divides the earth up into
a grid. Each cell in this grid is represented by a geohash string. Each
cell in turn can be further subdivided into smaller cells which are
represented by a longer string. So the longer the geohash, the smaller
(and thus more accurate) the cell is.
Because geohashes are just strings, they can be stored in an inverted
index like any other string, which makes querying them very efficient.
If you enable the `geohash` option, a `geohash` ``sub-field'' will be indexed
as, eg `.geohash`. The length of the geohash is controlled by the
<<geohash-precision,`geohash_precision`>> parameter.
If the <<geohash-prefix,`geohash_prefix`>> option is enabled, the `geohash`
option will be enabled automatically.
For example:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"location": {
"type": "geo_point", <1>
"geohash": true
}
}
}
}
}
PUT my_index/my_type/1
{
"location": {
"lat": 41.12,
"lon": -71.34
}
}
GET my_index/_search?fielddata_fields=location.geohash <2>
{
"query": {
"prefix": {
"location.geohash": "drm3b" <3>
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> A `location.geohash` field will be indexed for each geo-point.
<2> The geohash can be retrieved with <<doc-values,`doc_values`>>.
<3> A <<query-dsl-prefix-query,`prefix`>> query can find all geohashes which start with a particular prefix.
[WARNING]
============================================
A `prefix` query on geohashes is expensive. Instead, consider using the
<<geohash-prefix,`geohash_prefix`>> to pay the expense once at index time
instead of on every query.
============================================

View File

@ -0,0 +1,61 @@
[[ignore-above]]
=== `ignore_above`
Strings longer than the `ignore_above` setting will not be processed by the
<<analyzer,analyzer>> and will not be indexed. This is mainly useful for
<<mapping-index,`not_analyzed`>> string fields, which are typically used for
filtering, aggregations, and sorting. These are structured fields and it
doesn't usually make sense to allow very long terms to be indexed in these
fields.
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"message": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 20 <1>
}
}
}
}
}
PUT my_index/my_type/1 <2>
{
"message": "Syntax error"
}
PUT my_index/my_type/2 <3>
{
"message": "Syntax error with some long stacktrace"
}
GET _search <4>
{
"aggs": {
"messages": {
"terms": {
"field": "message"
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> This field will ignore any string longer than 20 characters.
<2> This document is indexed successfully.
<3> This document will be indexed, but without indexing the `message` field.
<4> Search returns both documents, but only the first is present in the terms aggregation.
This option is also useful for protecting against Lucene's term byte-length
limit of `32766`.
NOTE: The value for `ignore_above` is the _character count_, but Lucene counts
bytes. If you use UTF-8 text with many non-ASCII characters, you may want to
set the limit to `32766 / 3 = 10922` since UTF-8 characters may occupy at most
3 bytes.

View File

@ -0,0 +1,83 @@
[[ignore-malformed]]
=== `ignore_malformed`
Sometimes you don't have much control over the data that you receive. One
user may send a `login` field that is a <<date,`date`>>, and another sends a
`login` field that is an email address.
Trying to index the wrong datatype into a field throws an exception by
default, and rejects the whole document. The `ignore_malformed` parameter, if
set to `true`, allows the exception to be ignored. The malformed field is not
indexed, but other fields in the document are processed normally.
For example:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"number_one": {
"type": "integer"
},
"number_two": {
"type": "integer",
"ignore_malformed": true
}
}
}
}
}
PUT my_index/my_type/1
{
"text": "Some text value",
"number_one": "foo" <1>
}
PUT my_index/my_type/2
{
"text": "Some text value",
"number_two": "foo" <2>
}
--------------------------------------------------
// AUTOSENSE
<1> This document will be rejected because `number_one` does not allow malformed values.
<2> This document will have the `text` field indexed, but not the `number_two` field.
[[ignore-malformed-setting]]
==== Index-level default
The `index.mapping.ignore_malformed` setting can be set on the index level to
allow to ignore malformed content globally across all mapping types.
[source,js]
--------------------------------------------------
PUT my_index
{
"settings": {
"index.mapping.ignore_malformed": true <1>
},
"mappings": {
"my_type": {
"properties": {
"number_one": { <1>
"type": "byte"
},
"number_two": {
"type": "integer",
"ignore_malformed": false <2>
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> The `number_one` field inherits the index-level setting.
<2> The `number_two` field overrides the index-level setting to turn off `ignore_malformed`.

View File

@ -0,0 +1,83 @@
[[include-in-all]]
=== `include_in_all`
The `include_in_all` parameter provides per-field control over which fields
are included in the <<mapping-all-field,`_all`>> field. It defaults to `true`, unless <<mapping-index,`index`>> is set to `no`.
This example demonstrates how to exclude the `date` field from the `_all` field:
[source,js]
--------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"title": { <1>
"type": "string"
}
"content": { <1>
"type": "string"
},
"date": { <2>
"type": "date",
"include_in_all": false
}
}
}
}
}
--------------------------------
// AUTOSENSE
<1> The `title` and `content` fields with be included in the `_all` field.
<2> The `date` field will not be included in the `_all` field.
The `include_in_all` parameter can also be set at the type level and on
<<object,`object`>> or <<nested,`nested`>> fields, in which case all sub-
fields inherit that setting. For instance:
[source,js]
--------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"include_in_all": false, <1>
"properties": {
"title": { "type": "string" },
"author": {
"include_in_all": true, <2>
"properties": {
"first_name": { "type": "string" },
"last_name": { "type": "string" }
}
},
"editor": {
"properties": {
"first_name": { "type": "string" }, <3>
"last_name": { "type": "string", "include_in_all": true } <3>
}
}
}
}
}
}
--------------------------------
// AUTOSENSE
<1> All fields in `my_type` are excluded from `_all`.
<2> The `author.first_name` and `author.last_name` fields are included in `_all`.
<3> Only the `editor.last_name` field is included in `_all`.
The `editor.first_name` inherits the type-level setting and is excluded.
[NOTE]
.Multi-fields and `include_in_all`
=================================
The original field value is added to the `_all` field, not the terms produced
by a field's analyzer. For this reason, it makes no sense to set
`include_in_all` to `true` on <<multi-fields,multi-fields>>, as each
multi-field has exactly the same value as its parent.
=================================

View File

@ -0,0 +1,70 @@
[[index-options]]
=== `index_options`
The `index_options` parameter controls what information is added to the
inverted index, for search and highlighting purposes. It accepts the
following settings:
[horizontal]
`docs`::
Only the doc number is indexed. Can answer the question _Does this term
exist in this field?_
`freqs`::
Doc number and term frequencies are indexed. Term frequencies are used to
score repeated terms higher than single terms.
`positions`::
Doc number, term frequencies, and term positions (or order) are indexed.
Positions can be used for
<<query-dsl-match-query-phrase,proximity or phrase queries>>.
`offsets`::
Doc number, term frequencies, positions, and start and end character
offsets (which map the term back to the original string) are indexed.
Offsets are used by the <<postings-highlighter,postings highlighter>>.
<<mapping-index,Analyzed>> string fields use `positions` as the default, and
<<all other fields use `docs` as the default.
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"text": {
"type": "string",
"index_options": "offsets"
}
}
}
}
}
PUT my_index/my_type/1
{
"text": "Quick brown fox"
}
GET my_index/_search
{
"query": {
"match": {
"text": "brown fox"
}
},
"highlight": {
"fields": {
"text": {} <1>
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> The `text` field will use the postings highlighter by default because `offsets` are indexed.

View File

@ -0,0 +1,48 @@
[[mapping-index]]
=== `index`
The `index` option controls how field values are indexed and, thus, how they
are searchable. It accepts three values:
[horizontal]
`no`::
Do not add this field value to the index. With this setting, the field
will not be queryable.
`not_analyzed`::
Add the field value to the index unchanged, as a single term. This is the
default for all fields that support this option except for
<<string,`string`>> fields. `not_analyzed` fields are usually used with
<<term-level-queries,term-level queries>> for structured search.
`analyzed`::
This option applies only to `string` fields, for which it is the default.
The string field value is first <<analysis,analyzed>> to convert the
string into terms (e.g. a list of individual words), which are then
indexed. At search time, the the query string is passed through
(<<search-analyzer,usually>>) the same analyzer to generate terms
in the same format as those in the index. It is this process that enables
<<full-text-queries,full text search>>.
For example, you can create a `not_analyzed` string field with the following:
[source,js]
--------------------------------------------------
PUT /my_index
{
"mappings": {
"my_type": {
"properties": {
"status_code": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE

View File

@ -0,0 +1,63 @@
[[lat-lon]]
=== `lat_lon`
<<geo-queries,Geo-queries>> are usually performed by plugging the value of
each <<geo-point,`geo_point`>> field into a formula to determine whether it
falls into the required area or not. Unlike most queries, the inverted index
is not involved.
Setting `lat_lon` to `true` causes the latitude and longitude values to be
indexed as numeric fields (called `.lat` and `.lon`). These fields can be used
by the <<query-dsl-geo-bounding-box-query,`geo_bounding_box`>> and
<<query-dsl-geo-distance-query,`geo_distance`>> queries instead of
performing in-memory calculations.
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"location": {
"type": "geo_point",
"lat_lon": true <1>
}
}
}
}
}
PUT my_index/my_type/1
{
"location": {
"lat": 41.12,
"lon": -71.34
}
}
GET my_index/_search
{
"query": {
"geo_distance": {
"location": {
"lat": 41,
"lon": -71
},
"distance": "50km",
"optimize_bbox": "indexed" <2>
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> Setting `lat_lon` to true indexes the geo-point in the `location.lat` and `location.lon` fields.
<2> The `indexed` option tells the geo-distance query to use the inverted index instead of the in-memory calculation.
Whether the in-memory or indexed operation performs better depends both on
your dataset and on the types of queries that you are running.
NOTE: The `lat_lon` option only makes sense for single-value `geo_point`
fields. It will not work with arrays of geo-points.

View File

@ -0,0 +1,132 @@
[[multi-fields]]
=== `fields`
It is often useful to index the same field in different ways for different
purposes. This is the purpose of _multi-fields_. For instance, a `string`
field could be <<mapping-index,indexed>> as an `analyzed` field for full-text
search, and as a `not_analyzed` field for sorting or aggregations:
[source,js]
--------------------------------------------------
PUT /my_index
{
"mappings": {
"my_type": {
"properties": {
"city": {
"type": "string",
"fields": {
"raw": { <1>
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
PUT /my_index/my_type/1
{
"city": "New York"
}
PUT /my_index/my_type/2
{
"city": "York"
}
GET /my_index/_search
{
"query": {
"match": {
"city": "york" <2>
}
},
"sort": {
"city.raw": "asc" <3>
},
"aggs": {
"Cities": {
"terms": {
"field": "city.raw" <3>
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> The `city.raw` field is a `not_analyzed` version of the `city` field.
<2> The analyzed `city` field can be used for full text search.
<3> The `city.raw` field can be used for sorting and aggregations
NOTE: Multi-fields do not change the original `_source` field.
==== Multi-fields with multiple analyzers
Another use case of multi-fields is to analyze the same field in different
ways for better relevance. For instance we could index a field with the
<<analysis-standard-analyzer,`standard` analyzer>> which breaks text up into
words, and again with the <<english-analyzer,`english` analyzer>>
which stems words into their root form:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"text": { <1>
"type": "string"
},
"fields": {
"english": { <2>
"type": "string",
"analyzer": "english"
}
}
}
}
}
}
PUT my_index/my_type/1
{ "text": "quick brown fox" } <3>
PUT my_index/my_type/2
{ "text": "quick brown foxes" } <3>
GET my_index/_search
{
"query": {
"multi_match": {
"query": "quick brown foxes",
"fields": [ <4>
"text",
"text.english"
],
"type": "most_fields" <4>
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> The `text` field uses the `standard` analyzer.
<2> The `text.english` field uses the `english` analyzer.
<3> Index two documents, one with `fox` and the other with `foxes`.
<4> Query both the `text` and `text.english` fields and combine the scores.
The `text` field contains the term `fox` in the first document and `foxes` in
the second document. The `text.english` field contains `fox` for both
documents, because `foxes` is stemmed to `fox`.
The query string is also analyzed by the `standard` analyzer for the `text`
field, and by the `english` analyzer` for the `text.english` field. The
stemmed field allows a query for `foxes` to also match the document containing
just `fox`. This allows us to match as many documents as possible. By also
querying the unstemmed `text` field, we improve the relevance score of the
document which matches `foxes` exactly.

View File

@ -0,0 +1,64 @@
[[norms]]
=== `norms`
Norms store various normalization factors -- a number to represent the
relative field length and the <<index-boost,index time `boost`>> setting --
that are later used at query time in order to compute the score of a document
relatively to a query.
Although useful for scoring, norms also require quite a lot of memory
(typically in the order of one byte per document per field in your index, even
for documents that don't have this specific field). As a consequence, if you
don't need scoring on a specific field, you should disable norms on that
field. In particular, this is the case for fields that are used solely for
filtering or aggregations.
Norms can be disabled (but not reenabled) after the fact, using the
<<indices-put-mapping,PUT mapping API>> like so:
[source,js]
------------
PUT my_index/_mapping/my_type
{
"properties": {
"title": {
"type": "string",
"norms": {
"enabled": false
}
}
}
}
------------
// AUTOSENSE
NOTE: Norms will not be removed instantly, but will be removed as old segments
are merged into new segments as you continue indexing new documents. Any score
computation on a field that has had norms removed might return inconsistent
results since some documents won't have norms anymore while other documents
might still have norms.
==== Lazy loading of norms
Norms can be loaded into memory eagerly (`eager`), whenever a new segment
comes online, or they can loaded lazily (`lazy`, default), only when the field
is queried.
Eager loading can be configured as follows:
[source,js]
------------
PUT my_index/_mapping/my_type
{
"properties": {
"title": {
"type": "string",
"norms": {
"loading": "eager"
}
}
}
}
------------
// AUTOSENSE

View File

@ -0,0 +1,58 @@
[[null-value]]
=== `null_value`
A `null` value cannot be indexed or searched. When a field is set to `null`,
(or an empty array or an array of `null` values) it is treated as though that
field has no values.
The `null_value` parameter allows you to replace explicit `null` values with
the specified value so that it can be indexed and searched. For instance:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"status_code": {
"type": "string",
"index": "not_analyzed",
"null_value": "NULL" <1>
}
}
}
}
}
PUT my_index/my_type/1
{
"status_code": null
}
PUT my_index/my_type/2
{
"status_code": [] <2>
}
GET my_index/_search
{
"query": {
"term": {
"status_code": "NULL" <3>
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> Replace explicit `null` values with the term `NULL`.
<2> An empty array does not contain an explicit `null`, and so won't be replaced with the `null_value`.
<3> A query for `NULL` returns document 1, but not document 2.
IMPORTANT: The `null_value` needs to be the same datatype as the field. For
instance, a `long` field cannot have a string `null_value`. String fields
which are `analyzed` will also pass the `null_value` through the configured
analyzer.
Also see the <<query-dsl-missing-query,`missing` query>> for its `null_value` support.

View File

@ -0,0 +1,68 @@
[[position-offset-gap]]
=== `position_offset_gap`
<<mapping-index,Analyzed>> string fields take term <<index-options,positions>>
into account, in order to be able to support
<<query-dsl-match-query-phrase,proximity or phrase queries>>.
When indexing an array of strings, each string of the array is indexed
directly after the previous one, almost as though all the strings in the array
had been concatenated into one big string.
This can result in matches from phrase queries spanning two array elements.
For instance:
[source,js]
--------------------------------------------------
PUT /my_index/groups/1
{
"names": [ "John Abraham", "Lincoln Smith"]
}
GET /my_index/groups/_search
{
"query": {
"match_phrase": {
"names": "Abraham Lincoln" <1>
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> This phrase query matches our document, even though `Abraham` and `Lincoln` are in separate strings.
The `position_offset_gap` can introduce a fake gap between each array element. For instance:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"names": {
"type": "string",
"position_offset_gap": 50 <1>
}
}
}
}
}
PUT /my_index/groups/1
{
"names": [ "John Abraham", "Lincoln Smith"]
}
GET /my_index/groups/_search
{
"query": {
"match_phrase": {
"names": "Abraham Lincoln" <2>
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> The first term in the next array element will be 50 terms apart from the
last term in the previous array element.
<2> The phrase query no longer matches our document.

View File

@ -0,0 +1,56 @@
[[precision-step]]
=== `precision_step`
Most <<number,numeric>> datatypes index extra terms representing numeric
ranges for each number to make <<query-dsl-range-query,`range` queries>>
faster. For instance, this `range` query:
[source,js]
--------------------------------------------------
"range": {
"number": {
"gte": 0
"lte": 321
}
}
--------------------------------------------------
might be executed internally as a <<query-dsl-terms-query,`terms` query>> that
looks something like this:
[source,js]
--------------------------------------------------
"terms": {
"number": [
"0-255",
"256-319"
"320",
"321"
]
}
--------------------------------------------------
These extra terms greatly reduce the number of terms that have to be examined,
at the cost of increased disk space.
The default value for `precision_step` depends on the `type` of the numeric field:
[horizontal]
`long`, `double`, `date`, `ip`:: `16` (3 extra terms)
`integer`, `float`, `short`:: `8` (3 extra terms)
`byte`:: `2147483647` (0 extra terms)
`token_count`:: `32` (0 extra terms)
The value of the `precision_step` setting indicates the number of bits that
should be compressed into an extra term. A `long` value consists of 64 bits,
so a `precision_step` of 16 results in the following terms:
[horizontal]
Bits 0-15:: `value & 1111111111111111 0000000000000000 0000000000000000 0000000000000000`
Bits 0-31:: `value & 1111111111111111 1111111111111111 0000000000000000 0000000000000000`
Bits 0-47:: `value & 1111111111111111 1111111111111111 1111111111111111 0000000000000000`
Bits 0-63:: `value`

View File

@ -0,0 +1,101 @@
[[properties]]
=== `properties`
Type mappings, <<object,`object` fields>> and <<nested,`nested` fields>>
contain sub-fields, called `properties`. These properties may be of any
<<mapping-types,datatype>>, including `object` and `nested`. Properties can
be added:
* explicitly by defining them when <<indices-create-index,creating an index>>.
* explicitily by defining them when adding or updating a mapping type with the <<indices-put-mapping,PUT mapping>> API.
* <<dynamic-mapping,dynamically>> just by indexing documents containing new fields.
Below is an example of adding `properties` to a mapping type, an `object`
field, and a `nested` field:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": { <1>
"properties": {
"manager": { <2>
"properties": {
"age": { "type": "integer" },
"name": { "type": "string" }
}
},
"employees": { <3>
"type": "nested",
"properties": {
"age": { "type": "integer" },
"name": { "type": "string" }
}
}
}
}
}
}
PUT my_index/my_type/1 <4>
{
"region": "US",
"manager": {
"name": "Alice White",
"age": 30
},
"employees": [
{
"name": "John Smith",
"age": 34
},
{
"name": "Peter Brown",
"age": 26
}
]
}
--------------------------------------------------
// AUTOSENSE
<1> Properties under the `my_type` mapping type.
<2> Properties under the `manager` object field.
<3> Properties under the `employees` nested field.
<4> An example document which corresponds to the above mapping.
==== Dot notation
Inner fields can be referred to in queries, aggregations, etc., using _dot
notation_:
[source,js]
--------------------------------------------------
GET my_index/_search
{
"query": {
"match": {
"manager.name": "Alice White" <1>
}
},
"aggs": {
"Employees": {
"nested": {
"path": "employees"
},
"aggs": {
"Employee Ages": {
"histogram": {
"field": "employees.age", <2>
"interval": 5
}
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
IMPORTANT: The full path to the inner field must be specified.

View File

@ -0,0 +1,79 @@
[[search-analyzer]]
=== `search_analyzer`
Usually, the same <<analyzer,analyzer>> should be applied at index time and at
search time, to ensure that the terms in the query are in the same format as
the terms in the inverted index.
Sometimes, though, it can make sense to use a different analyzer at search
time, such as when using the <<analysis-edgengram-tokenizer,`edge_ngram`>>
tokenizer for autocomplete.
By default, queries will use the `analyzer` defined in the field mapping, but
this can be overridden with the `search_analyzer` setting:
[source,js]
--------------------------------------------------
PUT /my_index
{
"settings": {
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20
}
},
"analyzer": {
"autocomplete": { <1>
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
}
},
"mappings": {
"my_type": {
"properties": {
"text": {
"type": "string",
"analyzer": "autocomplete", <2>
"search_analyzer": "standard" <2>
}
}
}
}
}
PUT my_index/my_type/1
{
"text": "Quick Brown Fox" <3>
}
GET my_index/_search
{
"query": {
"match": {
"text": {
"query": "Quick Br", <4>
"operator": "and"
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> Analysis settings to define the custom `autocomplete` analyzer.
<2> The `text` field uses the `autocomplete` analyzer at index time, but the `standard` analyzer at search time.
<3> This field is indexed as the terms: [ `q`, `qu`, `qui`, `quic`, `quick`, `b`, `br`, `bro`, `brow`, `brown`, `f`, `fo`, `fox` ]
<4> The query searches for both of these terms: [ `quick`, `br` ]
See {defguide}/_index_time_search_as_you_type.html[Index time search-as-you-
type] for a full explanation of this example.

View File

@ -0,0 +1,54 @@
[[similarity]]
=== `similarity`
Elasticsearch allows you to configure a scoring algorithm or _similarity_ per
field. The `similarity` setting provides a simple way of choosing a similarity
algorithm other than the default TF/IDF, such as `BM25`.
Similarities are mostly useful for <<string,`string`>> fields, especially
`analyzed` string fields, but can also apply to other field types.
Custom similarites can be configured by tuning the parameters of the built-in
similarities. For more details about this expert options, see the
<<index-modules-similarity,similarity module>>.
The only similarities which can be used out of the box, without any further
configuration are:
`default`::
The Default TF/IDF algorithm used by Elasticsearch and
Lucene. See {defguide}/practical-scoring-function.html[Lucenes Practical Scoring Function]
for more information.
`BM25`::
The Okapi BM25 algorithm.
See {defguide}/pluggable-similarites.html[Plugggable Similarity Algorithms]
for more information.
The `similarity` can be set on the field level when a field is first created,
as follows:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"default_field": { <1>
"type": "string"
},
"bm25_field": {
"type": "string",
"similarity": "BM25" <2>
}
}
}
}
}
--------------------------------------------------
// AUTOSENSE
<1> The `default_field` uses the `default` similarity (ie TF/IDF).
<2> The `bm25_field` uses the `BM25` similarity.

View File

@ -0,0 +1,73 @@
[[mapping-store]]
=== `store`
By default, field values <<mapping-index,indexed>> to make them searchable,
but they are not _stored_. This means that the field can be queried, but the
original field value cannot be retrieved.
Usually this doesn't matter. The field value is already part of the
<<mapping-source-field,`_source` field>>, which is stored by default. If you
only want to retrieve the value of a single field or of a few fields, instead
of the whole `_source`, then this can be achieved with
<<search-request-source-filtering,source filtering>>.
In certain situations it can make sense to `store` a field. For instance, if
you have a document with a `title`, a `date`, and a very large `content`
field, you may want to retrieve just the `title` and the `date` without having
to extract those fields from a large `_source` field:
[source,js]
--------------------------------------------------
PUT /my_index
{
"mappings": {
"my_type": {
"properties": {
"title": {
"type": "string",
"store": true <1>
},
"date": {
"type": "date",
"store": true <1>
},
"content": {
"type": "string"
}
}
}
}
}
PUT /my_index/my_type/1
{
"title": "Some short title",
"date": "2015-01-01",
"content": "A very long content field..."
}
GET my_index/_search
{
"fields": [ "title", "date" ] <2>
}
--------------------------------------------------
// AUTOSENSE
<1> The `title` and `date` fields are stored.
<2> This request will retrieve the values of the `title` and `date` fields.
[NOTE]
.Stored fields returned as arrays
======================================
For consistency, stored fields are always returned as an _array_ because there
is no way of knowing if the original field value was a single value, multiple
values, or an empty array.
If you need the original value, you should retrieve it from the `_source`
field instead.
======================================
Another situation where it can make sense to make a field stored is for those
that don't appear in the `_source` field (such as <<copy-to,`copy_to` fields>>).

Some files were not shown because too many files have changed in this diff Show More