Merge branch 'master' into integ_randomization
This commit is contained in:
commit
d1a5068b77
|
@ -14,7 +14,7 @@ docs/html/
|
|||
docs/build.log
|
||||
/tmp/
|
||||
backwards/
|
||||
|
||||
html_docs
|
||||
## eclipse ignores (use 'mvn eclipse:eclipse' to build eclipse projects)
|
||||
## All files (.project, .classpath, .settings/*) should be generated through Maven which
|
||||
## will correctly set the classpath based on the declared dependencies and write settings
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
@ -62,13 +63,17 @@ import java.util.List;
|
|||
public abstract class BlendedTermQuery extends Query {
|
||||
|
||||
private final Term[] terms;
|
||||
private final float[] boosts;
|
||||
|
||||
|
||||
public BlendedTermQuery(Term[] terms) {
|
||||
public BlendedTermQuery(Term[] terms, float[] boosts) {
|
||||
if (terms == null || terms.length == 0) {
|
||||
throw new IllegalArgumentException("terms must not be null or empty");
|
||||
}
|
||||
if (boosts != null && boosts.length != terms.length) {
|
||||
throw new IllegalArgumentException("boosts must have the same size as terms");
|
||||
}
|
||||
this.terms = terms;
|
||||
this.boosts = boosts;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -231,8 +236,22 @@ public abstract class BlendedTermQuery extends Query {
|
|||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "blended(terms: " + Arrays.toString(terms) + ")";
|
||||
|
||||
StringBuilder builder = new StringBuilder("blended(terms:[");
|
||||
for (int i = 0; i < terms.length; ++i) {
|
||||
builder.append(terms[i]);
|
||||
float boost = 1f;
|
||||
if (boosts != null) {
|
||||
boost = boosts[i];
|
||||
}
|
||||
builder.append(ToStringUtils.boost(boost));
|
||||
builder.append(", ");
|
||||
}
|
||||
if (terms.length > 0) {
|
||||
builder.setLength(builder.length() - 2);
|
||||
}
|
||||
builder.append("])");
|
||||
builder.append(ToStringUtils.boost(getBoost()));
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
private volatile Term[] equalTerms = null;
|
||||
|
@ -277,7 +296,7 @@ public abstract class BlendedTermQuery extends Query {
|
|||
}
|
||||
|
||||
public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) {
|
||||
return new BlendedTermQuery(terms) {
|
||||
return new BlendedTermQuery(terms, boosts) {
|
||||
@Override
|
||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||
BooleanQuery query = new BooleanQuery(disableCoord);
|
||||
|
@ -294,7 +313,7 @@ public abstract class BlendedTermQuery extends Query {
|
|||
}
|
||||
|
||||
public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) {
|
||||
return new BlendedTermQuery(terms) {
|
||||
return new BlendedTermQuery(terms, boosts) {
|
||||
@Override
|
||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||
BooleanQuery query = new BooleanQuery(true);
|
||||
|
@ -334,7 +353,7 @@ public abstract class BlendedTermQuery extends Query {
|
|||
}
|
||||
|
||||
public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) {
|
||||
return new BlendedTermQuery(terms) {
|
||||
return new BlendedTermQuery(terms, boosts) {
|
||||
@Override
|
||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||
DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);
|
||||
|
|
|
@ -272,13 +272,13 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
|
|||
} catch (IndexNotFoundException e) {
|
||||
// one of the specified indices is not there - treat it as RED.
|
||||
ClusterHealthResponse response = new ClusterHealthResponse(clusterName.value(), Strings.EMPTY_ARRAY, clusterState,
|
||||
numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(settings, clusterState),
|
||||
numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(), settings, clusterState),
|
||||
pendingTaskTimeInQueue);
|
||||
response.status = ClusterHealthStatus.RED;
|
||||
return response;
|
||||
}
|
||||
|
||||
return new ClusterHealthResponse(clusterName.value(), concreteIndices, clusterState, numberOfPendingTasks,
|
||||
numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(settings, clusterState), pendingTaskTimeInQueue);
|
||||
numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(), settings, clusterState), pendingTaskTimeInQueue);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,7 +59,7 @@ final class JVMCheck {
|
|||
/** Returns an error message to the user for a broken version */
|
||||
String getErrorMessage() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("Java version: ").append(Constants.JAVA_VERSION);
|
||||
sb.append("Java version: ").append(fullVersion());
|
||||
sb.append(" suffers from critical bug ").append(bugUrl);
|
||||
sb.append(" which can cause data corruption.");
|
||||
sb.append(System.lineSeparator());
|
||||
|
@ -111,7 +111,7 @@ final class JVMCheck {
|
|||
*/
|
||||
static void check() {
|
||||
if (Boolean.parseBoolean(System.getProperty(JVM_BYPASS))) {
|
||||
Loggers.getLogger(JVMCheck.class).warn("bypassing jvm version check for version [{}], this can result in data corruption!", Constants.JAVA_VERSION);
|
||||
Loggers.getLogger(JVMCheck.class).warn("bypassing jvm version check for version [{}], this can result in data corruption!", fullVersion());
|
||||
} else if ("Oracle Corporation".equals(Constants.JVM_VENDOR)) {
|
||||
HotspotBug bug = JVM_BROKEN_HOTSPOT_VERSIONS.get(Constants.JVM_VERSION);
|
||||
if (bug != null) {
|
||||
|
@ -135,11 +135,28 @@ final class JVMCheck {
|
|||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("IBM J9 runtimes < 2.8 suffer from several bugs which can cause data corruption.");
|
||||
sb.append(System.lineSeparator());
|
||||
sb.append("Your version: " + Constants.JVM_VERSION);
|
||||
sb.append("Your version: " + fullVersion());
|
||||
sb.append(System.lineSeparator());
|
||||
sb.append("Please upgrade the JVM to a recent IBM JDK");
|
||||
throw new RuntimeException(sb.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns java + jvm version, looks like this:
|
||||
* {@code Oracle Corporation 1.8.0_45 [Java HotSpot(TM) 64-Bit Server VM 25.45-b02]}
|
||||
*/
|
||||
static String fullVersion() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(Constants.JAVA_VENDOR);
|
||||
sb.append(" ");
|
||||
sb.append(Constants.JAVA_VERSION);
|
||||
sb.append(" [");
|
||||
sb.append(Constants.JVM_NAME);
|
||||
sb.append(" ");
|
||||
sb.append(Constants.JVM_VERSION);
|
||||
sb.append("]");
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,6 +57,7 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
|
|||
private AtomicBoolean rerouting = new AtomicBoolean();
|
||||
private volatile long registeredNextDelaySetting = Long.MAX_VALUE;
|
||||
private volatile ScheduledFuture registeredNextDelayFuture;
|
||||
private volatile long unassignedShardsAllocatedTimestamp = 0;
|
||||
|
||||
@Inject
|
||||
public RoutingService(Settings settings, ThreadPool threadPool, ClusterService clusterService, AllocationService allocationService) {
|
||||
|
@ -87,6 +88,19 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
|
|||
return this.allocationService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the last time the allocator tried to assign unassigned shards
|
||||
*
|
||||
* This is used so that both the GatewayAllocator and RoutingService use a
|
||||
* consistent timestamp for comparing which shards have been delayed to
|
||||
* avoid a race condition where GatewayAllocator thinks the shard should
|
||||
* be delayed and the RoutingService thinks it has already passed the delay
|
||||
* and that the GatewayAllocator has/will handle it.
|
||||
*/
|
||||
public void setUnassignedShardsAllocatedTimestamp(long timeInMillis) {
|
||||
this.unassignedShardsAllocatedTimestamp = timeInMillis;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiates a reroute.
|
||||
*/
|
||||
|
@ -108,20 +122,29 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
|
|||
if (nextDelaySetting > 0 && nextDelaySetting < registeredNextDelaySetting) {
|
||||
FutureUtils.cancel(registeredNextDelayFuture);
|
||||
registeredNextDelaySetting = nextDelaySetting;
|
||||
TimeValue nextDelay = TimeValue.timeValueMillis(UnassignedInfo.findNextDelayedAllocationIn(settings, event.state()));
|
||||
logger.info("delaying allocation for [{}] unassigned shards, next check in [{}]", UnassignedInfo.getNumberOfDelayedUnassigned(settings, event.state()), nextDelay);
|
||||
registeredNextDelayFuture = threadPool.schedule(nextDelay, ThreadPool.Names.SAME, new AbstractRunnable() {
|
||||
@Override
|
||||
protected void doRun() throws Exception {
|
||||
registeredNextDelaySetting = Long.MAX_VALUE;
|
||||
reroute("assign delayed unassigned shards");
|
||||
}
|
||||
// We use System.currentTimeMillis here because we want the
|
||||
// next delay from the "now" perspective, rather than the
|
||||
// delay from the last time the GatewayAllocator tried to
|
||||
// assign/delay the shard
|
||||
TimeValue nextDelay = TimeValue.timeValueMillis(UnassignedInfo.findNextDelayedAllocationIn(System.currentTimeMillis(), settings, event.state()));
|
||||
int unassignedDelayedShards = UnassignedInfo.getNumberOfDelayedUnassigned(unassignedShardsAllocatedTimestamp, settings, event.state());
|
||||
if (unassignedDelayedShards > 0) {
|
||||
logger.info("delaying allocation for [{}] unassigned shards, next check in [{}]",
|
||||
unassignedDelayedShards, nextDelay);
|
||||
registeredNextDelayFuture = threadPool.schedule(nextDelay, ThreadPool.Names.SAME, new AbstractRunnable() {
|
||||
@Override
|
||||
protected void doRun() throws Exception {
|
||||
registeredNextDelaySetting = Long.MAX_VALUE;
|
||||
reroute("assign delayed unassigned shards");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Throwable t) {
|
||||
logger.warn("failed to schedule/execute reroute post unassigned shard", t);
|
||||
}
|
||||
});
|
||||
@Override
|
||||
public void onFailure(Throwable t) {
|
||||
logger.warn("failed to schedule/execute reroute post unassigned shard", t);
|
||||
registeredNextDelaySetting = Long.MAX_VALUE;
|
||||
}
|
||||
});
|
||||
}
|
||||
} else {
|
||||
logger.trace("no need to schedule reroute due to delayed unassigned, next_delay_setting [{}], registered [{}]", nextDelaySetting, registeredNextDelaySetting);
|
||||
}
|
||||
|
|
|
@ -199,12 +199,12 @@ public class UnassignedInfo implements ToXContent, Writeable<UnassignedInfo> {
|
|||
/**
|
||||
* The time in millisecond until this unassigned shard can be reassigned.
|
||||
*/
|
||||
public long getDelayAllocationExpirationIn(Settings settings, Settings indexSettings) {
|
||||
public long getDelayAllocationExpirationIn(long unassignedShardsAllocatedTimestamp, Settings settings, Settings indexSettings) {
|
||||
long delayTimeout = getAllocationDelayTimeoutSetting(settings, indexSettings);
|
||||
if (delayTimeout == 0) {
|
||||
return 0;
|
||||
}
|
||||
long delta = System.currentTimeMillis() - timestamp;
|
||||
long delta = unassignedShardsAllocatedTimestamp - timestamp;
|
||||
// account for time drift, treat it as no timeout
|
||||
if (delta < 0) {
|
||||
return 0;
|
||||
|
@ -216,12 +216,12 @@ public class UnassignedInfo implements ToXContent, Writeable<UnassignedInfo> {
|
|||
/**
|
||||
* Returns the number of shards that are unassigned and currently being delayed.
|
||||
*/
|
||||
public static int getNumberOfDelayedUnassigned(Settings settings, ClusterState state) {
|
||||
public static int getNumberOfDelayedUnassigned(long unassignedShardsAllocatedTimestamp, Settings settings, ClusterState state) {
|
||||
int count = 0;
|
||||
for (ShardRouting shard : state.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED)) {
|
||||
if (shard.primary() == false) {
|
||||
IndexMetaData indexMetaData = state.metaData().index(shard.getIndex());
|
||||
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(settings, indexMetaData.getSettings());
|
||||
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(unassignedShardsAllocatedTimestamp, settings, indexMetaData.getSettings());
|
||||
if (delay > 0) {
|
||||
count++;
|
||||
}
|
||||
|
@ -251,12 +251,12 @@ public class UnassignedInfo implements ToXContent, Writeable<UnassignedInfo> {
|
|||
/**
|
||||
* Finds the next (closest) delay expiration of an unassigned shard. Returns 0 if there are none.
|
||||
*/
|
||||
public static long findNextDelayedAllocationIn(Settings settings, ClusterState state) {
|
||||
public static long findNextDelayedAllocationIn(long unassignedShardsAllocatedTimestamp, Settings settings, ClusterState state) {
|
||||
long nextDelay = Long.MAX_VALUE;
|
||||
for (ShardRouting shard : state.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED)) {
|
||||
if (shard.primary() == false) {
|
||||
IndexMetaData indexMetaData = state.metaData().index(shard.getIndex());
|
||||
long nextShardDelay = shard.unassignedInfo().getDelayAllocationExpirationIn(settings, indexMetaData.getSettings());
|
||||
long nextShardDelay = shard.unassignedInfo().getDelayAllocationExpirationIn(unassignedShardsAllocatedTimestamp, settings, indexMetaData.getSettings());
|
||||
if (nextShardDelay > 0 && nextShardDelay < nextDelay) {
|
||||
nextDelay = nextShardDelay;
|
||||
}
|
||||
|
|
|
@ -128,9 +128,7 @@ public class ScriptScoreFunction extends ScoreFunction {
|
|||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
// Scripts might use _score so we return true here
|
||||
// TODO: Make scripts able to tell us whether they use scores
|
||||
return true;
|
||||
return script.needsScores();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -113,6 +113,10 @@ public class GatewayAllocator extends AbstractComponent {
|
|||
}
|
||||
|
||||
public boolean allocateUnassigned(final RoutingAllocation allocation) {
|
||||
// Take a snapshot of the current time and tell the RoutingService
|
||||
// about it, so it will use a consistent timestamp for delays
|
||||
long lastAllocateUnassignedRun = System.currentTimeMillis();
|
||||
this.routingService.setUnassignedShardsAllocatedTimestamp(lastAllocateUnassignedRun);
|
||||
boolean changed = false;
|
||||
|
||||
RoutingNodes.UnassignedShards unassigned = allocation.routingNodes().unassigned();
|
||||
|
@ -127,7 +131,7 @@ public class GatewayAllocator extends AbstractComponent {
|
|||
|
||||
changed |= primaryShardAllocator.allocateUnassigned(allocation);
|
||||
changed |= replicaShardAllocator.processExistingRecoveries(allocation);
|
||||
changed |= replicaShardAllocator.allocateUnassigned(allocation);
|
||||
changed |= replicaShardAllocator.allocateUnassigned(allocation, lastAllocateUnassignedRun);
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
|
|
@ -111,6 +111,10 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
|
|||
}
|
||||
|
||||
public boolean allocateUnassigned(RoutingAllocation allocation) {
|
||||
return allocateUnassigned(allocation, System.currentTimeMillis());
|
||||
}
|
||||
|
||||
public boolean allocateUnassigned(RoutingAllocation allocation, long allocateUnassignedTimestapm) {
|
||||
boolean changed = false;
|
||||
final RoutingNodes routingNodes = allocation.routingNodes();
|
||||
final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = routingNodes.unassigned().iterator();
|
||||
|
@ -174,7 +178,7 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
|
|||
// will anyhow wait to find an existing copy of the shard to be allocated
|
||||
// note: the other side of the equation is scheduling a reroute in a timely manner, which happens in the RoutingService
|
||||
IndexMetaData indexMetaData = allocation.metaData().index(shard.getIndex());
|
||||
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(settings, indexMetaData.getSettings());
|
||||
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(allocateUnassignedTimestapm, settings, indexMetaData.getSettings());
|
||||
if (delay > 0) {
|
||||
logger.debug("[{}][{}]: delaying allocation of [{}] for [{}]", shard.index(), shard.id(), shard, TimeValue.timeValueMillis(delay));
|
||||
/**
|
||||
|
|
|
@ -228,8 +228,6 @@ public final class ShardGetService extends AbstractIndexShardComponent {
|
|||
if (source.ttl > 0) {
|
||||
value = docMapper.TTLFieldMapper().valueForSearch(source.timestamp + source.ttl);
|
||||
}
|
||||
} else if (field.equals(SizeFieldMapper.NAME) && docMapper.rootMapper(SizeFieldMapper.class).fieldType().stored()) {
|
||||
value = source.source.length();
|
||||
} else {
|
||||
if (searchLookup == null) {
|
||||
searchLookup = new SearchLookup(mapperService, null, new String[]{type});
|
||||
|
|
|
@ -48,7 +48,6 @@ import org.elasticsearch.index.mapper.internal.IdFieldMapper;
|
|||
import org.elasticsearch.index.mapper.internal.IndexFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.ParentFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.RoutingFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.SizeFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.TTLFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
|
||||
|
@ -106,7 +105,6 @@ public class DocumentMapper implements ToXContent {
|
|||
this.rootMappers.put(IdFieldMapper.class, new IdFieldMapper(indexSettings, mapperService.fullName(IdFieldMapper.NAME)));
|
||||
this.rootMappers.put(RoutingFieldMapper.class, new RoutingFieldMapper(indexSettings, mapperService.fullName(RoutingFieldMapper.NAME)));
|
||||
// add default mappers, order is important (for example analyzer should come before the rest to set context.analyzer)
|
||||
this.rootMappers.put(SizeFieldMapper.class, new SizeFieldMapper(indexSettings, mapperService.fullName(SizeFieldMapper.NAME)));
|
||||
this.rootMappers.put(IndexFieldMapper.class, new IndexFieldMapper(indexSettings, mapperService.fullName(IndexFieldMapper.NAME)));
|
||||
this.rootMappers.put(SourceFieldMapper.class, new SourceFieldMapper(indexSettings));
|
||||
this.rootMappers.put(TypeFieldMapper.class, new TypeFieldMapper(indexSettings, mapperService.fullName(TypeFieldMapper.NAME)));
|
||||
|
@ -283,10 +281,6 @@ public class DocumentMapper implements ToXContent {
|
|||
return rootMapper(ParentFieldMapper.class);
|
||||
}
|
||||
|
||||
public SizeFieldMapper sizeFieldMapper() {
|
||||
return rootMapper(SizeFieldMapper.class);
|
||||
}
|
||||
|
||||
public TimestampFieldMapper timestampFieldMapper() {
|
||||
return rootMapper(TimestampFieldMapper.class);
|
||||
}
|
||||
|
@ -299,10 +293,6 @@ public class DocumentMapper implements ToXContent {
|
|||
return rootMapper(IndexFieldMapper.class);
|
||||
}
|
||||
|
||||
public SizeFieldMapper SizeFieldMapper() {
|
||||
return rootMapper(SizeFieldMapper.class);
|
||||
}
|
||||
|
||||
public Query typeFilter() {
|
||||
return typeMapper().fieldType().termQuery(type, null);
|
||||
}
|
||||
|
|
|
@ -20,7 +20,9 @@
|
|||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableSortedMap;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
|
@ -35,8 +37,6 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.AbstractIndexComponent;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.analysis.AnalysisService;
|
||||
import org.elasticsearch.index.mapper.core.*;
|
||||
import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper;
|
||||
|
@ -50,6 +50,7 @@ import org.elasticsearch.index.similarity.SimilarityLookupService;
|
|||
import org.elasticsearch.script.Script;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -73,6 +74,7 @@ public class DocumentMapperParser {
|
|||
|
||||
private volatile ImmutableMap<String, Mapper.TypeParser> typeParsers;
|
||||
private volatile ImmutableMap<String, Mapper.TypeParser> rootTypeParsers;
|
||||
private volatile ImmutableMap<String, Mapper.TypeParser> additionalRootMappers;
|
||||
|
||||
public DocumentMapperParser(@IndexSettings Settings indexSettings, MapperService mapperService, AnalysisService analysisService,
|
||||
SimilarityLookupService similarityLookupService, ScriptService scriptService) {
|
||||
|
@ -109,7 +111,6 @@ public class DocumentMapperParser {
|
|||
typeParsers = typeParsersBuilder.immutableMap();
|
||||
|
||||
rootTypeParsers = new MapBuilder<String, Mapper.TypeParser>()
|
||||
.put(SizeFieldMapper.NAME, new SizeFieldMapper.TypeParser())
|
||||
.put(IndexFieldMapper.NAME, new IndexFieldMapper.TypeParser())
|
||||
.put(SourceFieldMapper.NAME, new SourceFieldMapper.TypeParser())
|
||||
.put(TypeFieldMapper.NAME, new TypeFieldMapper.TypeParser())
|
||||
|
@ -123,6 +124,7 @@ public class DocumentMapperParser {
|
|||
.put(IdFieldMapper.NAME, new IdFieldMapper.TypeParser())
|
||||
.put(FieldNamesFieldMapper.NAME, new FieldNamesFieldMapper.TypeParser())
|
||||
.immutableMap();
|
||||
additionalRootMappers = ImmutableSortedMap.<String, Mapper.TypeParser>of();
|
||||
indexVersionCreated = Version.indexCreated(indexSettings);
|
||||
}
|
||||
|
||||
|
@ -139,6 +141,10 @@ public class DocumentMapperParser {
|
|||
rootTypeParsers = new MapBuilder<>(rootTypeParsers)
|
||||
.put(type, typeParser)
|
||||
.immutableMap();
|
||||
additionalRootMappers = ImmutableSortedMap.<String, Mapper.TypeParser>naturalOrder()
|
||||
.putAll(additionalRootMappers)
|
||||
.put(type, typeParser)
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -204,6 +210,10 @@ public class DocumentMapperParser {
|
|||
Mapper.TypeParser.ParserContext parserContext = parserContext();
|
||||
// parse RootObjectMapper
|
||||
DocumentMapper.Builder docBuilder = doc(indexSettings, (RootObjectMapper.Builder) rootObjectTypeParser.parse(type, mapping, parserContext), mapperService);
|
||||
// Add default mapping for the plugged-in meta mappers
|
||||
for (Map.Entry<String, Mapper.TypeParser> entry : additionalRootMappers.entrySet()) {
|
||||
docBuilder.put((MetadataFieldMapper.Builder<?, ?>) entry.getValue().parse(entry.getKey(), Collections.<String, Object>emptyMap(), parserContext));
|
||||
}
|
||||
Iterator<Map.Entry<String, Object>> iterator = mapping.entrySet().iterator();
|
||||
// parse DocumentMapper
|
||||
while(iterator.hasNext()) {
|
||||
|
|
|
@ -182,7 +182,9 @@ public class QueryParseContext {
|
|||
}
|
||||
|
||||
public void addNamedQuery(String name, Query query) {
|
||||
namedQueries.put(name, query);
|
||||
if (query != null) {
|
||||
namedQueries.put(name, query);
|
||||
}
|
||||
}
|
||||
|
||||
public ImmutableMap<String, Query> copyNamedQueries() {
|
||||
|
|
|
@ -85,7 +85,7 @@ public class MultiMatchQuery extends MatchQuery {
|
|||
throw new IllegalStateException("No such type: " + type);
|
||||
}
|
||||
final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch);
|
||||
return queryBuilder.conbineGrouped(queries);
|
||||
return queryBuilder.combineGrouped(queries);
|
||||
}
|
||||
|
||||
private QueryBuilder queryBuilder;
|
||||
|
@ -119,7 +119,7 @@ public class MultiMatchQuery extends MatchQuery {
|
|||
return parseAndApply(type, field, value, minimumShouldMatch, boostValue);
|
||||
}
|
||||
|
||||
public Query conbineGrouped(List<? extends Query> groupQuery) {
|
||||
public Query combineGrouped(List<? extends Query> groupQuery) {
|
||||
if (groupQuery == null || groupQuery.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
@ -196,7 +196,7 @@ public class MultiMatchQuery extends MatchQuery {
|
|||
blendedFields = null;
|
||||
}
|
||||
final FieldAndFieldType fieldAndFieldType = group.get(0);
|
||||
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, fieldAndFieldType.boost, value, minimumShouldMatch);
|
||||
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, 1f, value, minimumShouldMatch);
|
||||
if (q != null) {
|
||||
queries.add(q);
|
||||
}
|
||||
|
|
|
@ -86,6 +86,10 @@ public class NativeScriptEngineService extends AbstractComponent implements Scri
|
|||
script.setLookup(lookup.getLeafSearchLookup(context));
|
||||
return script;
|
||||
}
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return scriptFactory.needsScores();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -41,4 +41,11 @@ public interface NativeScriptFactory {
|
|||
* @param params The parameters passed to the script. Can be <tt>null</tt>.
|
||||
*/
|
||||
ExecutableScript newScript(@Nullable Map<String, Object> params);
|
||||
|
||||
/**
|
||||
* Indicates if document scores may be needed by the produced scripts.
|
||||
*
|
||||
* @return {@code true} if scores are needed.
|
||||
*/
|
||||
boolean needsScores();
|
||||
}
|
|
@ -29,4 +29,11 @@ public interface SearchScript {
|
|||
|
||||
LeafSearchScript getLeafSearchScript(LeafReaderContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Indicates if document scores may be needed by this {@link SearchScript}.
|
||||
*
|
||||
* @return {@code true} if scores are needed.
|
||||
*/
|
||||
boolean needsScores();
|
||||
|
||||
}
|
|
@ -112,7 +112,6 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
|
|||
for (String variable : expr.variables) {
|
||||
if (variable.equals("_score")) {
|
||||
bindings.add(new SortField("_score", SortField.Type.SCORE));
|
||||
|
||||
} else if (variable.equals("_value")) {
|
||||
specialValue = new ReplaceableConstValueSource();
|
||||
bindings.add("_value", specialValue);
|
||||
|
@ -173,7 +172,8 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
|
|||
}
|
||||
}
|
||||
|
||||
return new ExpressionSearchScript(compiledScript, bindings, specialValue);
|
||||
final boolean needsScores = expr.getSortField(bindings, false).needsScores();
|
||||
return new ExpressionSearchScript(compiledScript, bindings, specialValue, needsScores);
|
||||
} catch (Exception exception) {
|
||||
throw new ScriptException("Error during search with " + compiledScript, exception);
|
||||
}
|
||||
|
|
|
@ -46,14 +46,21 @@ class ExpressionSearchScript implements SearchScript {
|
|||
final SimpleBindings bindings;
|
||||
final ValueSource source;
|
||||
final ReplaceableConstValueSource specialValue; // _value
|
||||
final boolean needsScores;
|
||||
Scorer scorer;
|
||||
int docid;
|
||||
|
||||
ExpressionSearchScript(CompiledScript c, SimpleBindings b, ReplaceableConstValueSource v) {
|
||||
ExpressionSearchScript(CompiledScript c, SimpleBindings b, ReplaceableConstValueSource v, boolean needsScores) {
|
||||
compiledScript = c;
|
||||
bindings = b;
|
||||
source = ((Expression)compiledScript.compiled()).getValueSource(bindings);
|
||||
specialValue = v;
|
||||
this.needsScores = needsScores;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return needsScores;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -168,6 +168,12 @@ public class GroovyScriptEngineService extends AbstractComponent implements Scri
|
|||
}
|
||||
return new GroovyScript(compiledScript, scriptObject, leafLookup, logger);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
// TODO: can we reliably know if a groovy script makes use of _score
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -216,8 +216,7 @@ public abstract class ValuesSource {
|
|||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
// TODO: add a way to know whether scripts are using scores
|
||||
return true;
|
||||
return script.needsScores();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -295,8 +294,7 @@ public abstract class ValuesSource {
|
|||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
// TODO: add a way to know whether scripts are using scores
|
||||
return true;
|
||||
return script.needsScores();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -431,8 +429,7 @@ public abstract class ValuesSource {
|
|||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
// TODO: add a way to know whether scripts are using scores
|
||||
return true;
|
||||
return script.needsScores();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -451,8 +448,7 @@ public abstract class ValuesSource {
|
|||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
// TODO: add a way to know whether scripts are using scores
|
||||
return true;
|
||||
return script.needsScores();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -34,6 +34,11 @@ public class NativeScript1 extends AbstractSearchScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativeScript1();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static final String NATIVE_SCRIPT_1 = "native_1";
|
||||
|
|
|
@ -34,6 +34,11 @@ public class NativeScript2 extends AbstractSearchScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativeScript2();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static final String NATIVE_SCRIPT_2 = "native_2";
|
||||
|
|
|
@ -34,6 +34,11 @@ public class NativeScript3 extends AbstractSearchScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativeScript3();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static final String NATIVE_SCRIPT_3 = "native_3";
|
||||
|
|
|
@ -34,6 +34,11 @@ public class NativeScript4 extends AbstractSearchScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativeScript4();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static final String NATIVE_SCRIPT_4 = "native_4";
|
||||
|
|
|
@ -36,6 +36,11 @@ public class NativeConstantForLoopScoreScript extends AbstractSearchScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativeConstantForLoopScoreScript(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private NativeConstantForLoopScoreScript(Map<String, Object> params) {
|
||||
|
|
|
@ -36,6 +36,11 @@ public class NativeConstantScoreScript extends AbstractSearchScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativeConstantScoreScript();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private NativeConstantScoreScript() {
|
||||
|
|
|
@ -42,6 +42,11 @@ public class NativeNaiveTFIDFScoreScript extends AbstractSearchScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativeNaiveTFIDFScoreScript(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private NativeNaiveTFIDFScoreScript(Map<String, Object> params) {
|
||||
|
|
|
@ -44,6 +44,11 @@ public class NativePayloadSumNoRecordScoreScript extends AbstractSearchScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativePayloadSumNoRecordScoreScript(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private NativePayloadSumNoRecordScoreScript(Map<String, Object> params) {
|
||||
|
|
|
@ -44,6 +44,11 @@ public class NativePayloadSumScoreScript extends AbstractSearchScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativePayloadSumScoreScript(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private NativePayloadSumScoreScript(Map<String, Object> params) {
|
||||
|
|
|
@ -34,15 +34,18 @@ import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationComman
|
|||
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
|
||||
import org.elasticsearch.cluster.routing.allocation.decider.DisableAllocationDecider;
|
||||
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider.Allocation;
|
||||
import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
|
||||
import org.elasticsearch.common.Priority;
|
||||
import org.elasticsearch.common.io.FileSystemUtils;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.env.NodeEnvironment;
|
||||
import org.elasticsearch.index.shard.ShardId;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
|
||||
import org.elasticsearch.test.InternalTestCluster;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
@ -160,6 +163,40 @@ public class ClusterRerouteIT extends ESIntegTestCase {
|
|||
rerouteWithAllocateLocalGateway(commonSettings);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDelayWithALargeAmountOfShards() throws Exception {
|
||||
Settings commonSettings = settingsBuilder()
|
||||
.put("gateway.type", "local")
|
||||
.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_CONCURRENT_RECOVERIES, 1)
|
||||
.build();
|
||||
logger.info("--> starting 4 nodes");
|
||||
String node_1 = internalCluster().startNode(commonSettings);
|
||||
internalCluster().startNode(commonSettings);
|
||||
internalCluster().startNode(commonSettings);
|
||||
internalCluster().startNode(commonSettings);
|
||||
|
||||
assertThat(cluster().size(), equalTo(4));
|
||||
ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForNodes("4").execute().actionGet();
|
||||
assertThat(healthResponse.isTimedOut(), equalTo(false));
|
||||
|
||||
logger.info("--> create indices");
|
||||
for (int i = 0; i < 25; i++) {
|
||||
client().admin().indices().prepareCreate("test" + i)
|
||||
.setSettings(settingsBuilder()
|
||||
.put("index.number_of_shards", 5).put("index.number_of_replicas", 1)
|
||||
.put("index.unassigned.node_left.delayed_timeout", randomIntBetween(250, 1000) + "ms"))
|
||||
.execute().actionGet();
|
||||
}
|
||||
|
||||
ensureGreen(TimeValue.timeValueMinutes(1));
|
||||
|
||||
logger.info("--> stopping node1");
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node_1));
|
||||
|
||||
// This might run slowly on older hardware
|
||||
ensureGreen(TimeValue.timeValueMinutes(2));
|
||||
}
|
||||
|
||||
private void rerouteWithAllocateLocalGateway(Settings commonSettings) throws Exception {
|
||||
logger.info("--> starting 2 nodes");
|
||||
String node_1 = internalCluster().startNode(commonSettings);
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.elasticsearch.cluster.metadata.MetaData;
|
|||
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
||||
import org.elasticsearch.cluster.routing.allocation.AllocationService;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.test.ESAllocationTestCase;
|
||||
import org.elasticsearch.threadpool.ThreadPool;
|
||||
import org.junit.After;
|
||||
|
@ -112,6 +113,10 @@ public class RoutingServiceTests extends ESAllocationTestCase {
|
|||
ClusterState prevState = clusterState;
|
||||
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
|
||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||
// We need to update the routing service's last attempted run to
|
||||
// signal that the GatewayAllocator tried to allocated it but
|
||||
// it was delayed
|
||||
routingService.setUnassignedShardsAllocatedTimestamp(System.currentTimeMillis());
|
||||
ClusterState newState = clusterState;
|
||||
|
||||
routingService.clusterChanged(new ClusterChangedEvent("test", newState, prevState));
|
||||
|
@ -125,6 +130,44 @@ public class RoutingServiceTests extends ESAllocationTestCase {
|
|||
assertThat(routingService.getRegisteredNextDelaySetting(), equalTo(Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDelayedUnassignedDoesNotRerouteForNegativeDelays() throws Exception {
|
||||
AllocationService allocation = createAllocationService();
|
||||
MetaData metaData = MetaData.builder()
|
||||
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "100ms"))
|
||||
.numberOfShards(1).numberOfReplicas(1))
|
||||
.build();
|
||||
ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
|
||||
.metaData(metaData)
|
||||
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test"))).build();
|
||||
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")).localNodeId("node1").masterNodeId("node1")).build();
|
||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||
// starting primaries
|
||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
|
||||
// starting replicas
|
||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
|
||||
assertThat(clusterState.routingNodes().hasUnassigned(), equalTo(false));
|
||||
// remove node2 and reroute
|
||||
ClusterState prevState = clusterState;
|
||||
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
|
||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||
// Set it in the future so the delay will be negative
|
||||
routingService.setUnassignedShardsAllocatedTimestamp(System.currentTimeMillis() + TimeValue.timeValueMinutes(1).millis());
|
||||
|
||||
ClusterState newState = clusterState;
|
||||
|
||||
routingService.clusterChanged(new ClusterChangedEvent("test", newState, prevState));
|
||||
assertBusy(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
assertThat(routingService.hasReroutedAndClear(), equalTo(false));
|
||||
|
||||
// verify the registration has been updated
|
||||
assertThat(routingService.getRegisteredNextDelaySetting(), equalTo(100L));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private class TestRoutingService extends RoutingService {
|
||||
|
||||
private AtomicBoolean rerouted = new AtomicBoolean();
|
||||
|
|
|
@ -273,7 +273,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
|||
assertBusy(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
long delay = unassignedInfo.getDelayAllocationExpirationIn(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
||||
long delay = unassignedInfo.getDelayAllocationExpirationIn(System.currentTimeMillis(),
|
||||
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
||||
assertThat(delay, greaterThan(0l));
|
||||
assertThat(delay, lessThan(TimeValue.timeValueHours(10).millis()));
|
||||
}
|
||||
|
@ -290,7 +291,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
|||
UnassignedInfo unassignedInfo = new UnassignedInfo(RandomPicks.randomFrom(getRandom(), reasons), null);
|
||||
long delay = unassignedInfo.getAllocationDelayTimeoutSetting(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
||||
assertThat(delay, equalTo(0l));
|
||||
delay = unassignedInfo.getDelayAllocationExpirationIn(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
||||
delay = unassignedInfo.getDelayAllocationExpirationIn(System.currentTimeMillis(),
|
||||
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
||||
assertThat(delay, equalTo(0l));
|
||||
}
|
||||
|
||||
|
@ -306,7 +308,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
|||
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test1")).addAsNew(metaData.index("test2"))).build();
|
||||
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build();
|
||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
|
||||
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(),
|
||||
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
|
||||
// starting primaries
|
||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
|
||||
// starting replicas
|
||||
|
@ -315,7 +318,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
|||
// remove node2 and reroute
|
||||
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
|
||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||
assertThat(clusterState.prettyPrint(), UnassignedInfo.getNumberOfDelayedUnassigned(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(2));
|
||||
assertThat(clusterState.prettyPrint(), UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(),
|
||||
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(2));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -330,7 +334,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
|||
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test1")).addAsNew(metaData.index("test2"))).build();
|
||||
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build();
|
||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
|
||||
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(),
|
||||
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
|
||||
// starting primaries
|
||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
|
||||
// starting replicas
|
||||
|
@ -343,7 +348,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
|||
long nextDelaySetting = UnassignedInfo.findSmallestDelayedAllocationSetting(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState);
|
||||
assertThat(nextDelaySetting, equalTo(TimeValue.timeValueHours(10).millis()));
|
||||
|
||||
long nextDelay = UnassignedInfo.findNextDelayedAllocationIn(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState);
|
||||
long nextDelay = UnassignedInfo.findNextDelayedAllocationIn(System.currentTimeMillis(),
|
||||
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState);
|
||||
assertThat(nextDelay, greaterThan(TimeValue.timeValueHours(9).millis()));
|
||||
assertThat(nextDelay, lessThanOrEqualTo(TimeValue.timeValueHours(10).millis()));
|
||||
}
|
||||
|
|
|
@ -73,5 +73,10 @@ public class ScriptScoreFunctionTests extends ESTestCase {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1031,9 +1031,6 @@ public class GetActionIT extends ESIntegTestCase {
|
|||
" \"doc\": {\n" +
|
||||
" \"_timestamp\": {\n" +
|
||||
" \"enabled\": true\n" +
|
||||
" },\n" +
|
||||
" \"_size\": {\n" +
|
||||
" \"enabled\": true\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
|
@ -1045,7 +1042,7 @@ public class GetActionIT extends ESIntegTestCase {
|
|||
" \"text\": \"some text.\"\n" +
|
||||
"}\n";
|
||||
client().prepareIndex("test", "doc").setId("1").setSource(doc).setRouting("1").get();
|
||||
String[] fieldsList = {"_timestamp", "_size", "_routing"};
|
||||
String[] fieldsList = {"_timestamp", "_routing"};
|
||||
// before refresh - document is only in translog
|
||||
assertGetFieldsAlwaysWorks(indexOrAlias(), "doc", "1", fieldsList, "1");
|
||||
refresh();
|
||||
|
|
|
@ -44,8 +44,8 @@ import org.elasticsearch.index.mapper.MapperParsingException;
|
|||
import org.elasticsearch.index.mapper.ParseContext.Document;
|
||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.SizeFieldMapper;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -387,7 +387,7 @@ public class SimpleAllMapperTests extends ESSingleNodeTestCase {
|
|||
String mapping = "{";
|
||||
Map<String, String> rootTypes = new HashMap<>();
|
||||
//just pick some example from DocumentMapperParser.rootTypeParsers
|
||||
rootTypes.put(SizeFieldMapper.NAME, "{\"enabled\" : true}");
|
||||
rootTypes.put(TimestampFieldMapper.NAME, "{\"enabled\" : true}");
|
||||
rootTypes.put("include_in_all", "true");
|
||||
rootTypes.put("dynamic_date_formats", "[\"yyyy-MM-dd\", \"dd-MM-yyyy\"]");
|
||||
rootTypes.put("numeric_detection", "true");
|
||||
|
|
|
@ -33,16 +33,16 @@ public class ParseMappingTypeLevelTests extends ESSingleNodeTestCase {
|
|||
@Test
|
||||
public void testTypeLevel() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("_size").field("enabled", true).endObject()
|
||||
.startObject("_timestamp").field("enabled", true).endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapperParser parser = createIndex("test").mapperService().documentMapperParser();
|
||||
DocumentMapper mapper = parser.parse("type", mapping);
|
||||
assertThat(mapper.type(), equalTo("type"));
|
||||
assertThat(mapper.sizeFieldMapper().enabled(), equalTo(true));
|
||||
assertThat(mapper.timestampFieldMapper().enabled(), equalTo(true));
|
||||
|
||||
mapper = parser.parse(mapping);
|
||||
assertThat(mapper.type(), equalTo("type"));
|
||||
assertThat(mapper.sizeFieldMapper().enabled(), equalTo(true));
|
||||
assertThat(mapper.timestampFieldMapper().enabled(), equalTo(true));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -158,25 +158,6 @@ public class UpdateMappingTests extends ESSingleNodeTestCase {
|
|||
assertTrue(documentMapper.timestampFieldMapper().fieldType().stored());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSizeParsing() throws IOException {
|
||||
IndexService indexService = createIndex("test", Settings.settingsBuilder().build());
|
||||
XContentBuilder indexMapping = XContentFactory.jsonBuilder();
|
||||
boolean enabled = randomBoolean();
|
||||
indexMapping.startObject()
|
||||
.startObject("type")
|
||||
.startObject("_size")
|
||||
.field("enabled", enabled)
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject();
|
||||
DocumentMapper documentMapper = indexService.mapperService().parse("type", new CompressedXContent(indexMapping.string()), true);
|
||||
assertThat(documentMapper.sizeFieldMapper().enabled(), equalTo(enabled));
|
||||
assertTrue(documentMapper.sizeFieldMapper().fieldType().stored());
|
||||
documentMapper = indexService.mapperService().parse("type", new CompressedXContent(documentMapper.mappingSource().string()), true);
|
||||
assertThat(documentMapper.sizeFieldMapper().enabled(), equalTo(enabled));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSizeTimestampIndexParsing() throws IOException {
|
||||
IndexService indexService = createIndex("test", Settings.settingsBuilder().build());
|
||||
|
@ -192,7 +173,7 @@ public class UpdateMappingTests extends ESSingleNodeTestCase {
|
|||
createIndex("test1", Settings.settingsBuilder().build());
|
||||
createIndex("test2", Settings.settingsBuilder().build());
|
||||
XContentBuilder defaultMapping = XContentFactory.jsonBuilder().startObject()
|
||||
.startObject(MapperService.DEFAULT_MAPPING).startObject("_size").field("enabled", true).endObject().endObject()
|
||||
.startObject(MapperService.DEFAULT_MAPPING).startObject("_timestamp").field("enabled", true).endObject().endObject()
|
||||
.endObject();
|
||||
client().admin().indices().preparePutMapping().setType(MapperService.DEFAULT_MAPPING).setSource(defaultMapping).get();
|
||||
XContentBuilder typeMapping = XContentFactory.jsonBuilder().startObject()
|
||||
|
@ -204,7 +185,7 @@ public class UpdateMappingTests extends ESSingleNodeTestCase {
|
|||
GetMappingsResponse response = client().admin().indices().prepareGetMappings("test2").get();
|
||||
assertNotNull(response.getMappings().get("test2").get("type").getSourceAsMap().get("_all"));
|
||||
assertFalse((Boolean) ((LinkedHashMap) response.getMappings().get("test2").get("type").getSourceAsMap().get("_all")).get("enabled"));
|
||||
assertNotNull(response.getMappings().get("test2").get("type").getSourceAsMap().get("_size"));
|
||||
assertTrue((Boolean)((LinkedHashMap)response.getMappings().get("test2").get("type").getSourceAsMap().get("_size")).get("enabled"));
|
||||
assertNotNull(response.getMappings().get("test2").get("type").getSourceAsMap().get("_timestamp"));
|
||||
assertTrue((Boolean)((LinkedHashMap)response.getMappings().get("test2").get("type").getSourceAsMap().get("_timestamp")).get("enabled"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"type":{"_size":{"enabled":false},"_timestamp":{"enabled":false}}}
|
||||
{"type":{"_timestamp":{"enabled":false}}}
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class CommonTermsQueryParserTest extends ESSingleNodeTestCase {
|
||||
@Test
|
||||
public void testWhenParsedQueryIsNullNoNullPointerExceptionIsThrown() throws IOException {
|
||||
final String index = "test-index";
|
||||
final String type = "test-type";
|
||||
client()
|
||||
.admin()
|
||||
.indices()
|
||||
.prepareCreate(index)
|
||||
.addMapping(type, "name", "type=string,analyzer=stop")
|
||||
.execute()
|
||||
.actionGet();
|
||||
ensureGreen();
|
||||
|
||||
CommonTermsQueryBuilder commonTermsQueryBuilder =
|
||||
new CommonTermsQueryBuilder("name", "the").queryName("query-name");
|
||||
|
||||
// the named query parses to null; we are testing this does not cause a NullPointerException
|
||||
SearchResponse response =
|
||||
client().prepareSearch(index).setTypes(type).setQuery(commonTermsQueryBuilder).execute().actionGet();
|
||||
|
||||
assertNotNull(response);
|
||||
assertEquals(response.getHits().hits().length, 0);
|
||||
}
|
||||
}
|
|
@ -54,6 +54,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
|
|||
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.index.engine.Engine;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
||||
|
@ -83,6 +84,7 @@ import static org.hamcrest.Matchers.*;
|
|||
public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
||||
|
||||
private IndexQueryParserService queryParser;
|
||||
private IndexService indexService;
|
||||
|
||||
@Before
|
||||
public void setup() throws IOException {
|
||||
|
@ -99,6 +101,7 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
|||
assertNotNull(doc.dynamicMappingsUpdate());
|
||||
client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get();
|
||||
|
||||
this.indexService = indexService;
|
||||
queryParser = indexService.queryParserService();
|
||||
}
|
||||
|
||||
|
@ -2269,6 +2272,23 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
|||
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
|
||||
}
|
||||
|
||||
public void testCrossFieldMultiMatchQuery() throws IOException {
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
Query parsedQuery = queryParser.parse(multiMatchQuery("banon", "name.first^2", "name.last^3", "foobar").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)).query();
|
||||
try (Engine.Searcher searcher = indexService.shardSafe(0).acquireSearcher("test")) {
|
||||
Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery);
|
||||
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("foobar", "banon")), Occur.SHOULD);
|
||||
TermQuery tq1 = new TermQuery(new Term("name.first", "banon"));
|
||||
tq1.setBoost(2);
|
||||
TermQuery tq2 = new TermQuery(new Term("name.last", "banon"));
|
||||
tq2.setBoost(3);
|
||||
expected.add(new DisjunctionMaxQuery(Arrays.<Query>asList(tq1, tq2), 0f), Occur.SHOULD);
|
||||
assertEquals(expected, rewrittenQuery);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleQueryString() throws Exception {
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
|
|
|
@ -506,37 +506,6 @@ public class PercolatorIT extends ESIntegTestCase {
|
|||
assertThat(percolate.getMatches(), emptyArray());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void percolateWithSizeField() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
|
||||
.startObject("_size").field("enabled", true).endObject()
|
||||
.startObject("properties").startObject("field1").field("type", "string").endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
assertAcked(prepareCreate("test").addMapping("type1", mapping));
|
||||
ensureGreen();
|
||||
|
||||
logger.info("--> register a query");
|
||||
client().prepareIndex("test", PercolatorService.TYPE_NAME, "kuku")
|
||||
.setSource(jsonBuilder().startObject()
|
||||
.field("query", termQuery("field1", "value1"))
|
||||
.endObject())
|
||||
.setRefresh(true)
|
||||
.execute().actionGet();
|
||||
|
||||
logger.info("--> percolate a document");
|
||||
PercolateResponse percolate = client().preparePercolate().setIndices("test").setDocumentType("type1")
|
||||
.setSource(jsonBuilder().startObject()
|
||||
.startObject("doc")
|
||||
.field("field1", "value1")
|
||||
.endObject()
|
||||
.endObject())
|
||||
.execute().actionGet();
|
||||
assertMatchCount(percolate, 1l);
|
||||
assertThat(percolate.getMatches(), arrayWithSize(1));
|
||||
assertThat(convertFromTextArray(percolate.getMatches(), "test"), arrayContaining("kuku"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPercolateStatistics() throws Exception {
|
||||
client().admin().indices().prepareCreate("test").execute().actionGet();
|
||||
|
|
|
@ -97,6 +97,11 @@ public class NativeScriptTests extends ESTestCase {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new MyScript();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static class MyScript extends AbstractExecutableScript {
|
||||
|
|
|
@ -81,6 +81,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new IntScript();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static class IntScript extends AbstractSearchScript {
|
||||
|
@ -95,6 +100,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new LongScript();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static class LongScript extends AbstractSearchScript {
|
||||
|
@ -109,6 +119,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new FloatScript();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static class FloatScript extends AbstractSearchScript {
|
||||
|
@ -123,6 +138,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new DoubleScript();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static class DoubleScript extends AbstractSearchScript {
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.script.expression;
|
||||
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.script.CompiledScript;
|
||||
import org.elasticsearch.script.ScriptService.ScriptType;
|
||||
import org.elasticsearch.script.SearchScript;
|
||||
import org.elasticsearch.search.lookup.SearchLookup;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
public class ExpressionScriptTests extends ESSingleNodeTestCase {
|
||||
|
||||
public void testNeedsScores() {
|
||||
IndexService index = createIndex("test", Settings.EMPTY, "type", "d", "type=double");
|
||||
|
||||
ExpressionScriptEngineService service = new ExpressionScriptEngineService(Settings.EMPTY);
|
||||
SearchLookup lookup = new SearchLookup(index.mapperService(), index.fieldData(), null);
|
||||
|
||||
Object compiled = service.compile("1.2");
|
||||
SearchScript ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
|
||||
assertFalse(ss.needsScores());
|
||||
|
||||
compiled = service.compile("doc['d'].value");
|
||||
ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
|
||||
assertFalse(ss.needsScores());
|
||||
|
||||
compiled = service.compile("1/_score");
|
||||
ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
|
||||
assertTrue(ss.needsScores());
|
||||
|
||||
compiled = service.compile("doc['d'].value * _score");
|
||||
ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
|
||||
assertTrue(ss.needsScores());
|
||||
}
|
||||
|
||||
}
|
|
@ -35,6 +35,11 @@ public class NativeSignificanceScoreScriptNoParams extends TestScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativeSignificanceScoreScriptNoParams();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private NativeSignificanceScoreScriptNoParams() {
|
||||
|
|
|
@ -36,6 +36,11 @@ public class NativeSignificanceScoreScriptWithParams extends TestScript {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new NativeSignificanceScoreScriptWithParams(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private NativeSignificanceScoreScriptWithParams(Map<String, Object> params) {
|
||||
|
|
|
@ -76,9 +76,8 @@ public class SearchFieldsIT extends ESIntegTestCase {
|
|||
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().execute().actionGet();
|
||||
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
|
||||
// _timestamp and _size are randomly enabled via templates but we don't want it here to test stored fields behaviour
|
||||
// _timestamp is randomly enabled via templates but we don't want it here to test stored fields behaviour
|
||||
.startObject("_timestamp").field("enabled", false).endObject()
|
||||
.startObject("_size").field("enabled", false).endObject()
|
||||
.startObject("properties")
|
||||
.startObject("field1").field("type", "string").field("store", "yes").endObject()
|
||||
.startObject("field2").field("type", "string").field("store", "no").endObject()
|
||||
|
|
|
@ -102,6 +102,10 @@ public class ExplainableScriptIT extends ESIntegTestCase {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new MyScript();
|
||||
}
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static class MyScript extends AbstractDoubleSearchScript implements ExplainableSearchScript, ExecutableScript {
|
||||
|
|
|
@ -102,7 +102,6 @@ import org.elasticsearch.index.fielddata.FieldDataType;
|
|||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType.Loading;
|
||||
import org.elasticsearch.index.mapper.internal.SizeFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
|
||||
import org.elasticsearch.index.shard.MergePolicyConfig;
|
||||
import org.elasticsearch.index.translog.Translog;
|
||||
|
@ -357,11 +356,6 @@ public abstract class ESIntegTestCase extends ESTestCase {
|
|||
.field("enabled", randomBoolean());
|
||||
mappings.endObject();
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
mappings.startObject(SizeFieldMapper.NAME)
|
||||
.field("enabled", randomBoolean())
|
||||
.endObject();
|
||||
}
|
||||
mappings.startArray("dynamic_templates")
|
||||
.startObject()
|
||||
.startObject("template-strings")
|
||||
|
|
|
@ -74,6 +74,10 @@ public class UpdateByNativeScriptIT extends ESIntegTestCase {
|
|||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||
return new CustomScript(params);
|
||||
}
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static class CustomScript extends AbstractExecutableScript {
|
||||
|
|
|
@ -53,7 +53,7 @@ Response:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
The specified field must be of type `geo_point` (which can only be set explicitly in the mappings). And it can also hold an array of `geo_point` fields, in which case all will be taken into account during aggregation. The origin point can accept all formats supported by the `geo_point` <<mapping-geo-point-type,type>>:
|
||||
The specified field must be of type `geo_point` (which can only be set explicitly in the mappings). And it can also hold an array of `geo_point` fields, in which case all will be taken into account during aggregation. The origin point can accept all formats supported by the <<geo-point,`geo_point` type>>:
|
||||
|
||||
* Object format: `{ "lat" : 52.3760, "lon" : 4.894 }` - this is the safest format as it is the most explicit about the `lat` & `lon` values
|
||||
* String format: `"52.3760, 4.894"` - where the first number is the `lat` and the second is the `lon`
|
||||
|
|
|
@ -200,7 +200,7 @@ and therefore can't be used in the `order` option of the `terms` aggregator.
|
|||
If the `top_hits` aggregator is wrapped in a `nested` or `reverse_nested` aggregator then nested hits are being returned.
|
||||
Nested hits are in a sense hidden mini documents that are part of regular document where in the mapping a nested field type
|
||||
has been configured. The `top_hits` aggregator has the ability to un-hide these documents if it is wrapped in a `nested`
|
||||
or `reverse_nested` aggregator. Read more about nested in the <<mapping-nested-type,nested type mapping>>.
|
||||
or `reverse_nested` aggregator. Read more about nested in the <<nested,nested type mapping>>.
|
||||
|
||||
If nested type has been configured a single document is actually indexed as multiple Lucene documents and they share
|
||||
the same id. In order to determine the identity of a nested hit there is more needed than just the id, so that is why
|
||||
|
|
|
@ -152,6 +152,34 @@ being consumed by a monitoring tool, rather than intended for human
|
|||
consumption. The default for the `human` flag is
|
||||
`false`.
|
||||
|
||||
[[date-math]]
|
||||
[float]
|
||||
=== Date Math
|
||||
|
||||
Most parameters which accept a formatted date value -- such as `gt` and `lt`
|
||||
in <<query-dsl-range-query,range queries>> `range` queries, or `from` and `to`
|
||||
in <<search-aggregations-bucket-daterange-aggregation,`daterange`
|
||||
aggregations>> -- understand date maths.
|
||||
|
||||
The expression starts with an anchor date, which can either be `now`, or a
|
||||
date string ending with `||`. This anchor date can optionally be followed by
|
||||
one or more maths expressions:
|
||||
|
||||
* `+1h` - add one hour
|
||||
* `-1d` - subtract one day
|
||||
* `/d` - round down to the nearest day
|
||||
|
||||
The supported <<time-units,time units>> are: `y` (year), `M` (month), `w` (week),
|
||||
`d` (day), `h` (hour), `m` (minute), and `s` (second).
|
||||
|
||||
Some examples are:
|
||||
|
||||
[horizontal]
|
||||
`now+1h`:: The current time plus one hour, with ms resolution.
|
||||
`now+1h+1m`:: The current time plus one hour plus one minute, with ms resolution.
|
||||
`now+1h/d`:: The current time plus one hour, rounded down to the nearest day.
|
||||
`2015-01-01||+1M/d`:: `2015-01-01` plus one month, rounded down to the nearest day.
|
||||
|
||||
[float]
|
||||
=== Response Filtering
|
||||
|
||||
|
@ -237,10 +265,10 @@ curl 'localhost:9200/_segments?pretty&filter_path=indices.**.version'
|
|||
--------------------------------------------------
|
||||
|
||||
Note that elasticsearch sometimes returns directly the raw value of a field,
|
||||
like the `_source` field. If you want to filter _source fields, you should
|
||||
like the `_source` field. If you want to filter `_source` fields, you should
|
||||
consider combining the already existing `_source` parameter (see
|
||||
<<get-source-filtering,Get API>> for more details) with the `filter_path`
|
||||
parameter like this:
|
||||
parameter like this:
|
||||
|
||||
[source,sh]
|
||||
--------------------------------------------------
|
||||
|
@ -318,8 +346,9 @@ of supporting the native JSON number types.
|
|||
[float]
|
||||
=== Time units
|
||||
|
||||
Whenever durations need to be specified, eg for a `timeout` parameter, the duration
|
||||
can be specified as a whole number representing time in milliseconds, or as a time value like `2d` for 2 days. The supported units are:
|
||||
Whenever durations need to be specified, eg for a `timeout` parameter, the
|
||||
duration must specify the unit, like `2d` for 2 days. The supported units
|
||||
are:
|
||||
|
||||
[horizontal]
|
||||
`y`:: Year
|
||||
|
@ -329,6 +358,7 @@ can be specified as a whole number representing time in milliseconds, or as a ti
|
|||
`h`:: Hour
|
||||
`m`:: Minute
|
||||
`s`:: Second
|
||||
`ms`:: Milli-second
|
||||
|
||||
[[distance-units]]
|
||||
[float]
|
||||
|
|
|
@ -6,53 +6,3 @@ added to an index either when creating it or by using the put mapping
|
|||
api. It also handles the dynamic mapping support for types that have no
|
||||
explicit mappings pre defined. For more information about mapping
|
||||
definitions, check out the <<mapping,mapping section>>.
|
||||
|
||||
[float]
|
||||
=== Dynamic Mappings
|
||||
|
||||
New types and new fields within types can be added dynamically just
|
||||
by indexing a document. When Elasticsearch encounters a new type,
|
||||
it creates the type using the `_default_` mapping (see below).
|
||||
|
||||
When it encounters a new field within a type, it autodetects the
|
||||
datatype that the field contains and adds it to the type mapping
|
||||
automatically.
|
||||
|
||||
See <<mapping-dynamic-mapping>> for details of how to control and
|
||||
configure dynamic mapping.
|
||||
|
||||
[float]
|
||||
=== Default Mapping
|
||||
|
||||
When a new type is created (at <<indices-create-index,index creation>> time,
|
||||
using the <<indices-put-mapping,`put-mapping` API>> or just by indexing a
|
||||
document into it), the type uses the `_default_` mapping as its basis. Any
|
||||
mapping specified in the <<indices-create-index,`create-index`>> or
|
||||
<<indices-put-mapping,`put-mapping`>> request override values set in the
|
||||
`_default_` mapping.
|
||||
|
||||
The default mapping definition is a plain mapping definition that is
|
||||
embedded within Elasticsearch:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
_default_ : {
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
Pretty short, isn't it? Basically, everything is `_default_`ed, including the
|
||||
dynamic nature of the root object mapping which allows new fields to be added
|
||||
automatically.
|
||||
|
||||
The default mapping can be overridden by specifying the `_default_` type when
|
||||
creating a new index.
|
||||
|
||||
[float]
|
||||
=== Mapper settings
|
||||
|
||||
`index.mapper.dynamic` (_dynamic_)::
|
||||
|
||||
Dynamic creation of mappings for unmapped types can be completely
|
||||
disabled by setting `index.mapper.dynamic` to `false`.
|
||||
|
|
|
@ -6,8 +6,8 @@ are scored. Similarity is per field, meaning that via the mapping one
|
|||
can define a different similarity per field.
|
||||
|
||||
Configuring a custom similarity is considered a expert feature and the
|
||||
builtin similarities are most likely sufficient as is described in the
|
||||
<<mapping-core-types,mapping section>>
|
||||
builtin similarities are most likely sufficient as is described in
|
||||
<<similarity>>.
|
||||
|
||||
[float]
|
||||
[[configuration]]
|
||||
|
@ -90,7 +90,7 @@ Type name: `BM25`
|
|||
==== DFR similarity
|
||||
|
||||
Similarity that implements the
|
||||
http://lucene.apache.org/core/4_1_0/core/org/apache/lucene/search/similarities/DFRSimilarity.html[divergence
|
||||
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/DFRSimilarity.html[divergence
|
||||
from randomness] framework. This similarity has the following options:
|
||||
|
||||
[horizontal]
|
||||
|
@ -111,7 +111,7 @@ Type name: `DFR`
|
|||
[[ib]]
|
||||
==== IB similarity.
|
||||
|
||||
http://lucene.apache.org/core/4_1_0/core/org/apache/lucene/search/similarities/IBSimilarity.html[Information
|
||||
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/IBSimilarity.html[Information
|
||||
based model] . This similarity has the following options:
|
||||
|
||||
[horizontal]
|
||||
|
@ -125,7 +125,7 @@ Type name: `IB`
|
|||
[[lm_dirichlet]]
|
||||
==== LM Dirichlet similarity.
|
||||
|
||||
http://lucene.apache.org/core/4_7_1/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html[LM
|
||||
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html[LM
|
||||
Dirichlet similarity] . This similarity has the following options:
|
||||
|
||||
[horizontal]
|
||||
|
@ -137,7 +137,7 @@ Type name: `LMDirichlet`
|
|||
[[lm_jelinek_mercer]]
|
||||
==== LM Jelinek Mercer similarity.
|
||||
|
||||
http://lucene.apache.org/core/4_7_1/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html[LM
|
||||
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html[LM
|
||||
Jelinek Mercer similarity] . This similarity has the following options:
|
||||
|
||||
[horizontal]
|
||||
|
|
|
@ -3,76 +3,173 @@
|
|||
|
||||
[partintro]
|
||||
--
|
||||
Mapping is the process of defining how a document should be mapped to
|
||||
the Search Engine, including its searchable characteristics such as
|
||||
which fields are searchable and if/how they are tokenized. In
|
||||
Elasticsearch, an index may store documents of different "mapping
|
||||
types". Elasticsearch allows one to associate multiple mapping
|
||||
definitions for each mapping type.
|
||||
|
||||
Explicit mapping is defined on an index/type level. By default, there
|
||||
isn't a need to define an explicit mapping, since one is automatically
|
||||
created and registered when a new type or new field is introduced (with
|
||||
no performance overhead) and have sensible defaults. Only when the
|
||||
defaults need to be overridden must a mapping definition be provided.
|
||||
Mapping is the process of defining how a document, and the fields it contains,
|
||||
are stored and indexed. For instance, use mappings to define:
|
||||
|
||||
* which string fields should be treated as full text fields.
|
||||
* which fields contain numbers, dates, or geolocations.
|
||||
* whether the values of all fields in the document should be
|
||||
indexed into the catch-all <<mapping-all-field,`_all`>> field.
|
||||
* the <<mapping-date-format,format>> of date values.
|
||||
* custom rules to control the mapping for
|
||||
<<dynamic-mapping,dynamically added fields>>.
|
||||
|
||||
[float]
|
||||
[[all-mapping-types]]
|
||||
=== Mapping Types
|
||||
[[mapping-type]]
|
||||
== Mapping Types
|
||||
|
||||
Mapping types are a way to divide the documents in an index into logical
|
||||
groups. Think of it as tables in a database. Though there is separation
|
||||
between types, it's not a full separation (all end up as a document
|
||||
within the same Lucene index).
|
||||
Each index has one or more _mapping types_, which are used to divide the
|
||||
documents in an index into logical groups. User documents might be stored in a
|
||||
`user` type, and blog posts in a `blogpost` type.
|
||||
|
||||
Field names with the same name across types are highly recommended to
|
||||
have the same type and same mapping characteristics (analysis settings
|
||||
for example). There is an effort to allow to explicitly "choose" which
|
||||
field to use by using type prefix (`my_type.my_field`), but it's not
|
||||
complete, and there are places where it will never work (like
|
||||
aggregations on the field).
|
||||
Each mapping type has:
|
||||
|
||||
<<mapping-fields,Meta-fields>>::
|
||||
|
||||
Meta-fields are used to customize how a document's metadata associated is
|
||||
treated. Examples of meta-fields include the document's
|
||||
<<mapping-index-field,`_index`>>, <<mapping-type-field,`_type`>>,
|
||||
<<mapping-id-field,`_id`>>, and <<mapping-source-field,`_source`>> fields.
|
||||
|
||||
<<mapping-types,Fields>> or _properties_::
|
||||
|
||||
Each mapping type contains a list of fields or `properties` pertinent to that
|
||||
type. A `user` type might contain `title`, `name`, and `age` fields, while a
|
||||
`blogpost` type might contain `title`, `body`, `user_id` and `created` fields.
|
||||
Fields with the same name in different mapping types in the same index
|
||||
<<field-conflicts,must have the same mapping>>.
|
||||
|
||||
In practice though, this restriction is almost never an issue. The field
|
||||
name usually ends up being a good indication to its "typeness" (e.g.
|
||||
"first_name" will always be a string). Note also, that this does not
|
||||
apply to the cross index case.
|
||||
|
||||
[float]
|
||||
[[mapping-api]]
|
||||
=== Mapping API
|
||||
== Field datatypes
|
||||
|
||||
To create a mapping, you will need the <<indices-put-mapping,Put Mapping
|
||||
API>>, or you can add multiple mappings when you <<indices-create-index,create an
|
||||
index>>.
|
||||
Each field has a data `type` which can be:
|
||||
|
||||
* a simple type like <<string,`string`>>, <<date,`date`>>, <<number,`long`>>,
|
||||
<<number,`double`>>, <<boolean,`boolean`>> or <<ip,`ip`>>.
|
||||
* a type which supports the hierarchical nature of JSON such as
|
||||
<<object,`object`>> or <<nested,`nested`>>.
|
||||
* or a specialised type like <<geo-point,`geo_point`>>,
|
||||
<<geo-shape,`geo_shape`>>, or <<search-suggesters-completion,`completion`>>.
|
||||
|
||||
It is often useful to index the same field in different ways for different
|
||||
purposes. For instance, a `string` field could be <<mapping-index,indexed>> as
|
||||
an `analyzed` field for full-text search, and as a `not_analyzed` field for
|
||||
sorting or aggregations. Alternatively, you could index a string field with
|
||||
the <<analysis-standard-analyzer,`standard` analyzer>>, the
|
||||
<<english-analyzer,`english`>> analyzer, and the
|
||||
<<french-analyzer,`french` analyzer>>.
|
||||
|
||||
This is the purpose of _multi-fields_. Most datatypes support multi-fields
|
||||
via the <<multi-fields>> parameter.
|
||||
|
||||
[float]
|
||||
[[mapping-settings]]
|
||||
=== Global Settings
|
||||
== Dynamic mapping
|
||||
|
||||
Fields and mapping types do not need to be defined before being used. Thanks
|
||||
to _dynamic mapping_, new mapping types and new field names will be added
|
||||
automatically, just by indexing a document. New fields can be added both to
|
||||
the top-level mapping type, and to inner <<object,`object`>> and
|
||||
<<nested,`nested`>> fields.
|
||||
|
||||
The
|
||||
<<dynamic-mapping,dynamic mapping>> rules can be configured to
|
||||
customise the mapping that is used for new types and new fields.
|
||||
|
||||
[float]
|
||||
== Explicit mappings
|
||||
|
||||
You know more about your data than Elasticsearch can guess, so while dynamic
|
||||
mapping can be useful to get started, at some point you will want to specify
|
||||
your own explicit mappings.
|
||||
|
||||
You can create mapping types and field mappings when you
|
||||
<<indices-create-index,create an index>>, and you can add mapping types and
|
||||
fields to an existing index with the <<indices-put-mapping,PUT mapping API>>.
|
||||
|
||||
[float]
|
||||
== Updating existing mappings
|
||||
|
||||
Other than where documented, *existing type and field mappings cannot be
|
||||
updated*. Changing the mapping would mean invalidating already indexed
|
||||
documents. Instead, you should create a new index with the correct mappings
|
||||
and reindex your data into that index.
|
||||
|
||||
[[field-conflicts]]
|
||||
[float]
|
||||
== Fields are shared across mapping types
|
||||
|
||||
Mapping types are used to group fields, but the fields in each mapping type
|
||||
are not independent of each other. Fields with:
|
||||
|
||||
* the _same name_
|
||||
* in the _same index_
|
||||
* in _different mapping types_
|
||||
* map to the _same field_ internally,
|
||||
* and *must have the same mapping*.
|
||||
|
||||
If a `title` field exists in both the `user` and `blogpost` mapping types, the
|
||||
`title` fields must have exactly the same mapping in each type. The only
|
||||
exceptions to this rule are the <<copy-to>>, <<dynamic>>, <<enabled>>,
|
||||
<<ignore-above>>, <<include-in-all>>, and <<properties>> parameters, which may
|
||||
have different settings per field.
|
||||
|
||||
Usually, fields with the same name also contain the same type of data, so
|
||||
having the same mapping is not a problem. When conflicts do arise, these can
|
||||
be solved by choosing more descriptive names, such as `user_title` and
|
||||
`blog_title`.
|
||||
|
||||
[float]
|
||||
== Example mapping
|
||||
|
||||
A mapping for the example described above could be specified when creating the
|
||||
index, as follows:
|
||||
|
||||
[source,js]
|
||||
---------------------------------------
|
||||
PUT my_index <1>
|
||||
{
|
||||
"mappings": {
|
||||
"user": { <2>
|
||||
"_all": { "enabled": false }, <3>
|
||||
"properties": { <4>
|
||||
"title": { "type": "string" }, <5>
|
||||
"name": { "type": "string" }, <5>
|
||||
"age": { "type": "integer" } <5>
|
||||
}
|
||||
},
|
||||
"blogpost": { <2>
|
||||
"properties": { <4>
|
||||
"title": { "type": "string" }, <5>
|
||||
"body": { "type": "string" }, <5>
|
||||
"user_id": {
|
||||
"type": "string", <5>
|
||||
"index": "not_analyzed"
|
||||
},
|
||||
"created": {
|
||||
"type": "date", <5>
|
||||
"format": "strict_date_optional_time||epoch_millis"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
---------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> Create an index called `my_index`.
|
||||
<2> Add mapping types called `user` and `blogpost`.
|
||||
<3> Disable the `_all` <<mapping-fields,meta field>> for the `user` mapping type.
|
||||
<4> Specify fields or _properties_ in each mapping type.
|
||||
<5> Specify the data `type` and mapping for each field.
|
||||
|
||||
The `index.mapping.ignore_malformed` global setting can be set on the
|
||||
index level to allow to ignore malformed content globally across all
|
||||
mapping types (malformed content example is trying to index a text string
|
||||
value as a numeric type).
|
||||
|
||||
The `index.mapping.coerce` global setting can be set on the
|
||||
index level to coerce numeric content globally across all
|
||||
mapping types (The default setting is true and coercions attempted are
|
||||
to convert strings with numbers into numeric types and also numeric values
|
||||
with fractions to any integer/short/long values minus the fraction part).
|
||||
When the permitted conversions fail in their attempts, the value is considered
|
||||
malformed and the ignore_malformed setting dictates what will happen next.
|
||||
--
|
||||
|
||||
include::mapping/fields.asciidoc[]
|
||||
|
||||
include::mapping/types.asciidoc[]
|
||||
|
||||
include::mapping/date-format.asciidoc[]
|
||||
include::mapping/fields.asciidoc[]
|
||||
|
||||
include::mapping/fielddata_formats.asciidoc[]
|
||||
include::mapping/params.asciidoc[]
|
||||
|
||||
include::mapping/dynamic-mapping.asciidoc[]
|
||||
|
||||
include::mapping/meta.asciidoc[]
|
||||
|
||||
include::mapping/transform.asciidoc[]
|
||||
|
|
|
@ -1,238 +0,0 @@
|
|||
[[mapping-date-format]]
|
||||
== Date Format
|
||||
|
||||
In JSON documents, dates are represented as strings. Elasticsearch uses a set
|
||||
of pre-configured format to recognize and convert those, but you can change the
|
||||
defaults by specifying the `format` option when defining a `date` type, or by
|
||||
specifying `dynamic_date_formats` in the `root object` mapping (which will
|
||||
be used unless explicitly overridden by a `date` type). There are built in
|
||||
formats supported, as well as complete custom one.
|
||||
|
||||
The parsing of dates uses http://www.joda.org/joda-time/[Joda]. The
|
||||
default date parsing used if no format is specified is
|
||||
http://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateOptionalTimeParser--[ISODateTimeFormat.dateOptionalTimeParser].
|
||||
|
||||
An extension to the format allow to define several formats using `||`
|
||||
separator. This allows to define less strict formats that can be used,
|
||||
for example, the `yyyy/MM/dd HH:mm:ss||yyyy/MM/dd` format will parse
|
||||
both `yyyy/MM/dd HH:mm:ss` and `yyyy/MM/dd`. The first format will also
|
||||
act as the one that converts back from milliseconds to a string
|
||||
representation.
|
||||
|
||||
[float]
|
||||
[[date-math]]
|
||||
=== Date Math
|
||||
|
||||
The `date` type supports using date math expression when using it in a
|
||||
query/filter (mainly makes sense in `range` query/filter).
|
||||
|
||||
The expression starts with an "anchor" date, which can be either `now`
|
||||
or a date string (in the applicable format) ending with `||`. It can
|
||||
then follow by a math expression, supporting `+`, `-` and `/`
|
||||
(rounding). The units supported are `y` (year), `M` (month), `w` (week),
|
||||
`d` (day), `h` (hour), `m` (minute), and `s` (second).
|
||||
|
||||
Here are some samples: `now+1h`, `now+1h+1m`, `now+1h/d`,
|
||||
`2012-01-01||+1M/d`.
|
||||
|
||||
When doing `range` type searches with rounding, the value parsed
|
||||
depends on whether the end of the range is inclusive or exclusive, and
|
||||
whether the beginning or end of the range. Rounding up moves to the
|
||||
last millisecond of the rounding scope, and rounding down to the
|
||||
first millisecond of the rounding scope. The semantics work as follows:
|
||||
* `gt` - round up, and use > that value (`2014-11-18||/M` becomes `2014-11-30T23:59:59.999`, ie excluding the entire month)
|
||||
* `gte` - round D down, and use >= that value (`2014-11-18||/M` becomes `2014-11-01`, ie including the entire month)
|
||||
* `lt` - round D down, and use < that value (`2014-11-18||/M` becomes `2014-11-01`, ie excluding the entire month)
|
||||
* `lte` - round D up, and use <= that value(`2014-11-18||/M` becomes `2014-11-30T23:59:59.999`, ie including the entire month)
|
||||
|
||||
[float]
|
||||
[[built-in]]
|
||||
=== Built In Formats
|
||||
|
||||
Most of the below dates have a `strict` companion dates, which means, that
|
||||
year, month and day parts of the week must have prepending zeros in order
|
||||
to be valid. This means, that a date like `5/11/1` would not be valid, but
|
||||
you would need to specify the full date, which would be `2005/11/01` in this
|
||||
example. So instead of `date_optional_time` you would need to specify
|
||||
`strict_date_optional_time`.
|
||||
|
||||
The following tables lists all the defaults ISO formats supported:
|
||||
|
||||
[cols="<,<",options="header",]
|
||||
|=======================================================================
|
||||
|Name |Description
|
||||
|`basic_date`|A basic formatter for a full date as four digit year, two
|
||||
digit month of year, and two digit day of month (yyyyMMdd).
|
||||
|
||||
|`basic_date_time`|A basic formatter that combines a basic date and time,
|
||||
separated by a 'T' (yyyyMMdd'T'HHmmss.SSSZ).
|
||||
|
||||
|`basic_date_time_no_millis`|A basic formatter that combines a basic date
|
||||
and time without millis, separated by a 'T' (yyyyMMdd'T'HHmmssZ).
|
||||
|
||||
|`basic_ordinal_date`|A formatter for a full ordinal date, using a four
|
||||
digit year and three digit dayOfYear (yyyyDDD).
|
||||
|
||||
|`basic_ordinal_date_time`|A formatter for a full ordinal date and time,
|
||||
using a four digit year and three digit dayOfYear
|
||||
(yyyyDDD'T'HHmmss.SSSZ).
|
||||
|
||||
|`basic_ordinal_date_time_no_millis`|A formatter for a full ordinal date
|
||||
and time without millis, using a four digit year and three digit
|
||||
dayOfYear (yyyyDDD'T'HHmmssZ).
|
||||
|
||||
|`basic_time`|A basic formatter for a two digit hour of day, two digit
|
||||
minute of hour, two digit second of minute, three digit millis, and time
|
||||
zone offset (HHmmss.SSSZ).
|
||||
|
||||
|`basic_time_no_millis`|A basic formatter for a two digit hour of day,
|
||||
two digit minute of hour, two digit second of minute, and time zone
|
||||
offset (HHmmssZ).
|
||||
|
||||
|`basic_t_time`|A basic formatter for a two digit hour of day, two digit
|
||||
minute of hour, two digit second of minute, three digit millis, and time
|
||||
zone off set prefixed by 'T' ('T'HHmmss.SSSZ).
|
||||
|
||||
|`basic_t_time_no_millis`|A basic formatter for a two digit hour of day,
|
||||
two digit minute of hour, two digit second of minute, and time zone
|
||||
offset prefixed by 'T' ('T'HHmmssZ).
|
||||
|
||||
|`basic_week_date`|A basic formatter for a full date as four digit
|
||||
weekyear, two digit week of weekyear, and one digit day of week
|
||||
(xxxx'W'wwe). `strict_basic_week_date` is supported.
|
||||
|
||||
|`basic_week_date_time`|A basic formatter that combines a basic weekyear
|
||||
date and time, separated by a 'T' (xxxx'W'wwe'T'HHmmss.SSSZ).
|
||||
`strict_basic_week_date_time` is supported.
|
||||
|
||||
|`basic_week_date_time_no_millis`|A basic formatter that combines a basic
|
||||
weekyear date and time without millis, separated by a 'T'
|
||||
(xxxx'W'wwe'T'HHmmssZ). `strict_week_date_time` is supported.
|
||||
|
||||
|`date`|A formatter for a full date as four digit year, two digit month
|
||||
of year, and two digit day of month (yyyy-MM-dd). `strict_date` is supported.
|
||||
_
|
||||
|`date_hour`|A formatter that combines a full date and two digit hour of
|
||||
day. strict_date_hour` is supported.
|
||||
|
||||
|
||||
|`date_hour_minute`|A formatter that combines a full date, two digit hour
|
||||
of day, and two digit minute of hour. strict_date_hour_minute` is supported.
|
||||
|
||||
|`date_hour_minute_second`|A formatter that combines a full date, two
|
||||
digit hour of day, two digit minute of hour, and two digit second of
|
||||
minute. `strict_date_hour_minute_second` is supported.
|
||||
|
||||
|`date_hour_minute_second_fraction`|A formatter that combines a full
|
||||
date, two digit hour of day, two digit minute of hour, two digit second
|
||||
of minute, and three digit fraction of second
|
||||
(yyyy-MM-dd'T'HH:mm:ss.SSS). `strict_date_hour_minute_second_fraction` is supported.
|
||||
|
||||
|`date_hour_minute_second_millis`|A formatter that combines a full date,
|
||||
two digit hour of day, two digit minute of hour, two digit second of
|
||||
minute, and three digit fraction of second (yyyy-MM-dd'T'HH:mm:ss.SSS).
|
||||
`strict_date_hour_minute_second_millis` is supported.
|
||||
|
||||
|`date_optional_time`|a generic ISO datetime parser where the date is
|
||||
mandatory and the time is optional. `strict_date_optional_time` is supported.
|
||||
|
||||
|`date_time`|A formatter that combines a full date and time, separated by
|
||||
a 'T' (yyyy-MM-dd'T'HH:mm:ss.SSSZZ). `strict_date_time` is supported.
|
||||
|
||||
|`date_time_no_millis`|A formatter that combines a full date and time
|
||||
without millis, separated by a 'T' (yyyy-MM-dd'T'HH:mm:ssZZ).
|
||||
`strict_date_time_no_millis` is supported.
|
||||
|
||||
|`hour`|A formatter for a two digit hour of day. `strict_hour` is supported.
|
||||
|
||||
|`hour_minute`|A formatter for a two digit hour of day and two digit
|
||||
minute of hour. `strict_hour_minute` is supported.
|
||||
|
||||
|`hour_minute_second`|A formatter for a two digit hour of day, two digit
|
||||
minute of hour, and two digit second of minute.
|
||||
`strict_hour_minute_second` is supported.
|
||||
|
||||
|`hour_minute_second_fraction`|A formatter for a two digit hour of day,
|
||||
two digit minute of hour, two digit second of minute, and three digit
|
||||
fraction of second (HH:mm:ss.SSS).
|
||||
`strict_hour_minute_second_fraction` is supported.
|
||||
|
||||
|`hour_minute_second_millis`|A formatter for a two digit hour of day, two
|
||||
digit minute of hour, two digit second of minute, and three digit
|
||||
fraction of second (HH:mm:ss.SSS).
|
||||
`strict_hour_minute_second_millis` is supported.
|
||||
|
||||
|`ordinal_date`|A formatter for a full ordinal date, using a four digit
|
||||
year and three digit dayOfYear (yyyy-DDD). `strict_ordinal_date` is supported.
|
||||
|
||||
|`ordinal_date_time`|A formatter for a full ordinal date and time, using
|
||||
a four digit year and three digit dayOfYear (yyyy-DDD'T'HH:mm:ss.SSSZZ).
|
||||
`strict_ordinal_date_time` is supported.
|
||||
|
||||
|`ordinal_date_time_no_millis`|A formatter for a full ordinal date and
|
||||
time without millis, using a four digit year and three digit dayOfYear
|
||||
(yyyy-DDD'T'HH:mm:ssZZ).
|
||||
`strict_ordinal_date_time_no_millis` is supported.
|
||||
|
||||
|`time`|A formatter for a two digit hour of day, two digit minute of
|
||||
hour, two digit second of minute, three digit fraction of second, and
|
||||
time zone offset (HH:mm:ss.SSSZZ). `strict_time` is supported.
|
||||
|
||||
|`time_no_millis`|A formatter for a two digit hour of day, two digit
|
||||
minute of hour, two digit second of minute, and time zone offset
|
||||
(HH:mm:ssZZ). `strict_time_no_millis` is supported.
|
||||
|
||||
|`t_time`|A formatter for a two digit hour of day, two digit minute of
|
||||
hour, two digit second of minute, three digit fraction of second, and
|
||||
time zone offset prefixed by 'T' ('T'HH:mm:ss.SSSZZ).
|
||||
`strict_t_time` is supported.
|
||||
|
||||
|`t_time_no_millis`|A formatter for a two digit hour of day, two digit
|
||||
minute of hour, two digit second of minute, and time zone offset
|
||||
prefixed by 'T' ('T'HH:mm:ssZZ). `strict_t_time_no_millis` is supported.
|
||||
|
||||
|`week_date`|A formatter for a full date as four digit weekyear, two
|
||||
digit week of weekyear, and one digit day of week (xxxx-'W'ww-e).
|
||||
`strict_week_date` is supported.
|
||||
|
||||
|`week_date_time`|A formatter that combines a full weekyear date and
|
||||
time, separated by a 'T' (xxxx-'W'ww-e'T'HH:mm:ss.SSSZZ).
|
||||
`strict_week_date_time` is supported.
|
||||
|
||||
|`week_date_time_no_millis`|A formatter that combines a full weekyear date
|
||||
and time without millis, separated by a 'T' (xxxx-'W'ww-e'T'HH:mm:ssZZ).
|
||||
`strict_week_date_time` is supported.
|
||||
|
||||
|`weekyear`|A formatter for a four digit weekyear. `strict_week_year` is supported.
|
||||
|
||||
|`weekyear_week`|A formatter for a four digit weekyear and two digit week
|
||||
of weekyear. `strict_weekyear_week` is supported.
|
||||
|
||||
|`weekyear_week_day`|A formatter for a four digit weekyear, two digit week
|
||||
of weekyear, and one digit day of week. `strict_weekyear_week_day` is supported.
|
||||
|
||||
|`year`|A formatter for a four digit year. `strict_year` is supported.
|
||||
|
||||
|`year_month`|A formatter for a four digit year and two digit month of
|
||||
year. `strict_year_month` is supported.
|
||||
|
||||
|`year_month_day`|A formatter for a four digit year, two digit month of
|
||||
year, and two digit day of month. `strict_year_month_day` is supported.
|
||||
|
||||
|`epoch_second`|A formatter for the number of seconds since the epoch.
|
||||
Note, that this timestamp allows a max length of 10 chars, so dates
|
||||
older than 1653 and 2286 are not supported. You should use a different
|
||||
date formatter in that case.
|
||||
|
||||
|`epoch_millis`|A formatter for the number of milliseconds since the epoch.
|
||||
Note, that this timestamp allows a max length of 13 chars, so dates
|
||||
older than 1653 and 2286 are not supported. You should use a different
|
||||
date formatter in that case.
|
||||
|=======================================================================
|
||||
|
||||
[float]
|
||||
[[custom]]
|
||||
=== Custom Format
|
||||
|
||||
Allows for a completely customizable date format explained
|
||||
http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html[here].
|
|
@ -1,73 +1,67 @@
|
|||
[[mapping-dynamic-mapping]]
|
||||
[[dynamic-mapping]]
|
||||
== Dynamic Mapping
|
||||
|
||||
Default mappings allow generic mapping definitions to be automatically applied
|
||||
to types that do not have mappings predefined. This is mainly done
|
||||
thanks to the fact that the
|
||||
<<mapping-object-type,object mapping>> and
|
||||
namely the <<mapping-root-object-type,root
|
||||
object mapping>> allow for schema-less dynamic addition of unmapped
|
||||
fields.
|
||||
|
||||
The default mapping definition is a plain mapping definition that is
|
||||
embedded within the distribution:
|
||||
One of the most important features of Elasticsearch is that it tries to get
|
||||
out of your way and let you start exploring your data as quickly as possible.
|
||||
To index a document, you don't have to first create an index, define a mapping
|
||||
type, and define your fields -- you can just index a document and the index,
|
||||
type, and fields will spring to life automatically:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"_default_" : {
|
||||
}
|
||||
}
|
||||
PUT data/counters/1 <1>
|
||||
{ "count": 5 }
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> Creates the `data` index, the `counters` mapping type, and a field
|
||||
called `count` with datatype `long`.
|
||||
|
||||
Pretty short, isn't it? Basically, everything is defaulted, especially the
|
||||
dynamic nature of the root object mapping. The default mapping can be
|
||||
overridden by specifying the `_default_` type when creating a new index.
|
||||
The automatic detection and addition of new types and fields is called
|
||||
_dynamic mapping_. The dynamic mapping rules can be customised to suit your
|
||||
purposes with:
|
||||
|
||||
The dynamic creation of mappings for unmapped types can be completely
|
||||
disabled by setting `index.mapper.dynamic` to `false`.
|
||||
<<default-mapping,`_default_` mapping>>::
|
||||
|
||||
The dynamic creation of fields within a type can be completely
|
||||
disabled by setting the `dynamic` property of the type to `strict`.
|
||||
Configure the base mapping to be used for new mapping types.
|
||||
|
||||
Here is a <<indices-put-mapping,Put Mapping>> example that
|
||||
disables dynamic field creation for a `tweet`:
|
||||
<<dynamic-field-mapping,Dynamic field mappings>>::
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
$ curl -XPUT 'http://localhost:9200/twitter/_mapping/tweet' -d '
|
||||
{
|
||||
"tweet" : {
|
||||
"dynamic": "strict",
|
||||
"properties" : {
|
||||
"message" : {"type" : "string", "store" : true }
|
||||
}
|
||||
}
|
||||
}
|
||||
'
|
||||
--------------------------------------------------
|
||||
The rules governing dynamic field detection.
|
||||
|
||||
Here is how we can change the default
|
||||
<<mapping-date-format,date_formats>> used in the
|
||||
root and inner object types:
|
||||
<<dynamic-templates,Dynamic templates>>::
|
||||
|
||||
Custom rules to configure the mapping for dynamically added fields.
|
||||
|
||||
TIP: <<indices-templates,Index templates>> allow you to configure the default
|
||||
mappings, settings, aliases, and warmers for new indices, whether created
|
||||
automatically or explicitly.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"_default_" : {
|
||||
"dynamic_date_formats" : ["yyyy-MM-dd", "dd-MM-yyyy", "date_optional_time"]
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Unmapped fields in queries
|
||||
=== Disabling automatic type creation
|
||||
|
||||
Queries and filters can refer to fields that don't exist in a mapping. Whether this
|
||||
is allowed is controlled by the `index.query.parse.allow_unmapped_fields` setting.
|
||||
This setting defaults to `true`. Setting it to `false` will disallow the usage of
|
||||
unmapped fields in queries.
|
||||
Automatic type creation can be disabled by setting the `index.mapper.dynamic`
|
||||
setting to `false`, either by setting the default value in the
|
||||
`config/elasticsearch.yml` file, or per-index as an index setting:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /_settings <1>
|
||||
{
|
||||
"index.mapper.dynamic":false
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> Disable automatic type creation for all indices.
|
||||
|
||||
Regardless of the value of this setting, types can still be added explicitly
|
||||
when <<indices-create-index,creating an index>> or with the
|
||||
<<indices-put-mapping,PUT mapping>> API.
|
||||
|
||||
|
||||
include::dynamic/default-mapping.asciidoc[]
|
||||
|
||||
include::dynamic/field-mapping.asciidoc[]
|
||||
|
||||
include::dynamic/templates.asciidoc[]
|
||||
|
||||
When registering a new <<search-percolate,percolator query>> or creating
|
||||
a <<filtered,filtered alias>> then the `index.query.parse.allow_unmapped_fields` setting
|
||||
is forcefully overwritten to disallowed unmapped fields.
|
|
@ -0,0 +1,82 @@
|
|||
[[default-mapping]]
|
||||
=== `_default_` mapping
|
||||
|
||||
The default mapping, which will be used as the base mapping for any new
|
||||
mapping types, can be customised by adding a mapping type with the name
|
||||
`_default_` to an index, either when
|
||||
<<indices-create-index,creating the index>> or later on with the
|
||||
<<indices-put-mapping,PUT mapping>> API.
|
||||
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"_default_": { <1>
|
||||
"_all": {
|
||||
"enabled": false
|
||||
}
|
||||
},
|
||||
"user": {}, <2>
|
||||
"blogpost": { <3>
|
||||
"_all": {
|
||||
"enabled": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `_default_` mapping defaults the <<mapping-all-field,`_all`>> field to disabled.
|
||||
<2> The `user` type inherits the settings from `_default_`.
|
||||
<3> The `blogpost` type overrides the defaults and enables the <<mapping-all-field,`_all`>> field.
|
||||
|
||||
While the `_default_` mapping can be updated after an index has been created,
|
||||
the new defaults will only affect mapping types that are created afterwards.
|
||||
|
||||
The `_default_` mapping can be used in conjunction with
|
||||
<<indices-templates,Index templates>> to control dynamically created types
|
||||
within automatically created indices:
|
||||
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT _template/logging
|
||||
{
|
||||
"template": "logs-*", <1>
|
||||
"settings": { "number_of_shards": 1 }, <2>
|
||||
"mappings": {
|
||||
"_default_": {
|
||||
"_all": { <3>
|
||||
"enabled": false
|
||||
},
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"strings": { <4>
|
||||
"match_mapping_type": "string",
|
||||
"mapping": {
|
||||
"type": "string",
|
||||
"fields": {
|
||||
"raw": {
|
||||
"type": "string",
|
||||
"index": "not_analyzed",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT logs-2015.10.01/event/1
|
||||
{ "message": "error:16" }
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `logging` template will match any indices beginning with `logs-`.
|
||||
<2> Matching indices will be created with a single primary shard.
|
||||
<3> The `_all` field will be disabled by default for new type mappings.
|
||||
<4> String fields will be created with an `analyzed` main field, and a `not_analyzed` `.raw` field.
|
|
@ -0,0 +1,139 @@
|
|||
[[dynamic-field-mapping]]
|
||||
=== Dynamic field mapping
|
||||
|
||||
By default, when a previously unseen field is found in a document,
|
||||
Elasticsearch will add the new field to the type mapping. This behaviour can
|
||||
be disabled, both at the document and at the <<object,`object`>> level, by
|
||||
setting the <<dynamic,`dynamic`>> parameter to `false` or to `strict`.
|
||||
|
||||
Assuming `dynamic` field mapping is enabled, some simple rules are used to
|
||||
determine which datatype the field should have:
|
||||
|
||||
[horizontal]
|
||||
*JSON datatype*:: *Elasticsearch datatype*
|
||||
|
||||
`null`:: No field is added.
|
||||
`true` or `false`:: <<boolean,`boolean`>> field
|
||||
floating{nbsp}point{nbsp}number:: <<number,`double`>> field
|
||||
integer:: <<number,`long`>> field
|
||||
object:: <<object,`object`>> field
|
||||
array:: Depends on the first non-`null` value in the array.
|
||||
string:: Either a <<date,`date`>> field
|
||||
(if the value passes <<date-detection,date detection>>),
|
||||
a <<number,`double`>> or <<number,`long`>> field
|
||||
(if the value passes <<numeric-detection,numeric detection>>)
|
||||
or an <<mapping-index,`analyzed`>> <<string,`string`>> field.
|
||||
|
||||
These are the only <<mapping-types,field datatypes>> that are dynamically
|
||||
detected. All other datatypes must be mapped explicitly.
|
||||
|
||||
Besides the options listed below, dynamic field mapping rules can be further
|
||||
customised with <<dynamic-templates,`dynamic_templates`>>.
|
||||
|
||||
[[date-detection]]
|
||||
==== Date detection
|
||||
|
||||
If `date_detection` is enabled (default), then new string fields are checked
|
||||
to see whether their contents match any of the date patterns specified in
|
||||
`dynamic_date_formats`. If a match is found, a new <<date,`date`>> field is
|
||||
added with the corresponding format.
|
||||
|
||||
The default value for `dynamic_date_formats` is:
|
||||
|
||||
[ <<strict-date-time,`"strict_date_optional_time"`>>,`"yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"`]
|
||||
|
||||
For example:
|
||||
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"create_date": "2015/09/02"
|
||||
}
|
||||
|
||||
GET my_index/_mapping <1>
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `create_date` field has been added as a <<date,`date`>>
|
||||
field with the <<mapping-date-format,`format`>>: +
|
||||
`"yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"`.
|
||||
|
||||
===== Disabling date detection
|
||||
|
||||
Dynamic date dection can be disabled by setting `date_detection` to `false`:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"date_detection": false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1 <1>
|
||||
{
|
||||
"create": "2015/09/02"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
<1> The `create_date` field has been added as a <<string,`string`>> field.
|
||||
|
||||
===== Customising detected date formats
|
||||
|
||||
Alternatively, the `dynamic_date_formats` can be customised to support your
|
||||
own <<mapping-date-format,date formats>>:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"dynamic_date_formats": ["MM/dd/yyyy"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"create_date": "09/25/2015"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
|
||||
[[numeric-detection]]
|
||||
==== Numeric detection
|
||||
|
||||
While JSON has support for native floating point and integer datatypes, some
|
||||
applications or languages may sometimes render numbers as strings. Usually the
|
||||
correct solution is to map these fields explicitly, but numeric detection
|
||||
(which is disabled by default) can be enabled to do this automatically:
|
||||
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"numeric_detection": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"my_float": "1.0", <1>
|
||||
"my_integer": "1" <2>
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `my_float` field is added as a <<number,`double`>> field.
|
||||
<2> The `my_integer` field is added as a <<number,`long`>> field.
|
||||
|
|
@ -0,0 +1,251 @@
|
|||
[[dynamic-templates]]
|
||||
=== Dynamic templates
|
||||
|
||||
Dynamic templates allow you to define custom mappings that can be applied to
|
||||
dynamically added fields based on:
|
||||
|
||||
* the <<dynamic-mapping,datatype>> detected by Elasticsearch, with <<match-mapping-type,`match_mapping_type`>>.
|
||||
* the name of the field, with <<match-unmatch,`match` and `unmatch`>> or <<match-pattern,`match_pattern`>>.
|
||||
* the full dotted path to the field, with <<path-match-unmatch,`path_match` and `path_unmatch`>>.
|
||||
|
||||
The original field name `{name}` and the detected datatype
|
||||
`{dynamic_type`} <<template-variables,template variables>> can be used in
|
||||
the mapping specification as placeholders.
|
||||
|
||||
IMPORTANT: Dynamic field mappings are only added when a field contains a
|
||||
concrete value -- not `null` or an empty array. This means that if the
|
||||
`null_value` option is used in a `dynamic_template`, it will only be applied
|
||||
after the first document with a concrete value for the field has been
|
||||
indexed.
|
||||
|
||||
Dynamic templates are specified as an array of named objects:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"my_template_name": { <1>
|
||||
... match conditions ... <2>
|
||||
"mapping": { ... } <3>
|
||||
}
|
||||
},
|
||||
...
|
||||
]
|
||||
--------------------------------------------------
|
||||
<1> The template name can be any string value.
|
||||
<2> The match conditions can include any of : `match_mapping_type`, `match`, `match_pattern`, `unmatch`, `match_path`, `unmatch_path`.
|
||||
<3> The mapping that the matched field should use.
|
||||
|
||||
|
||||
Templates are processed in order -- the first matching template wins. New
|
||||
templates can be appended to the end of the list with the
|
||||
<<indices-put-mapping,PUT mapping>> API. If a new template has the same
|
||||
name as an existing template, it will replace the old version.
|
||||
|
||||
[[match-mapping-type]]
|
||||
==== `match_mapping_type`
|
||||
|
||||
The `match_mapping_type` matches on the datatype detected by
|
||||
<<dynamic-field-mapping,dynamic field mapping>>, in other words, the datatype
|
||||
that Elasticsearch thinks the field should have. Only the following datatypes
|
||||
can be automatically detected: `boolean`, `date`, `double`, `long`, `object`,
|
||||
`string`. It also accepts `*` to match all datatypes.
|
||||
|
||||
For example, if we wanted to map all integer fields as `integer` instead of
|
||||
`long`, and all `string` fields as both `analyzed` and `not_analyzed`, we
|
||||
could use the following template:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"integers": {
|
||||
"match_mapping_type": "long",
|
||||
"mapping": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"strings": {
|
||||
"match_mapping_type": "string",
|
||||
"mapping": {
|
||||
"type": "string",
|
||||
"fields": {
|
||||
"raw": {
|
||||
"type": "string",
|
||||
"index": "not_analyzed",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"my_integer": 5, <1>
|
||||
"my_string": "Some string" <2>
|
||||
}
|
||||
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `my_integer` field is mapped as an `integer`.
|
||||
<2> The `my_string` field is mapped as an analyzed `string`, with a `not_analyzed` <<multi-fields,multi field>>.
|
||||
|
||||
|
||||
[[match-unmatch]]
|
||||
==== `match` and `unmatch`
|
||||
|
||||
The `match` parameter uses a pattern to match on the fieldname, while
|
||||
`unmatch` uses a pattern to exclude fields matched by `match`.
|
||||
|
||||
The following example matches all `string` fields whose name starts with
|
||||
`long_` (except for those which end with `_text`) and maps them as `long`
|
||||
fields:
|
||||
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"longs_as_strings": {
|
||||
"match_mapping_type": "string",
|
||||
"match": "long_*",
|
||||
"unmatch": "*_text",
|
||||
"mapping": {
|
||||
"type": "long"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"long_num": "5", <1>
|
||||
"long_text": "foo" <2>
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `long_num` field is mapped as a `long`.
|
||||
<2> The `long_text` field uses the default `string` mapping.
|
||||
|
||||
[[match-pattern]]
|
||||
==== `match_pattern`
|
||||
|
||||
The `match_pattern` parameter behaves just like the `match` parameter, but
|
||||
supports full Java regular expression matching on the field name instead of
|
||||
simple wildcards, for instance:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"match_pattern": "^profit_\d+$"
|
||||
--------------------------------------------------
|
||||
|
||||
[[path-match-unmatch]]
|
||||
==== `path_match` and `path_unmatch`
|
||||
|
||||
The `path_match` and `path_unmatch` parameters work in the same way as `match`
|
||||
and `unmatch`, but operate on the full dotted path to the field, not just the
|
||||
final name, e.g. `some_object.*.some_field`.
|
||||
|
||||
This example copies the values of any fields in the `name` object to the
|
||||
top-level `full_name` field, except for the `middle` field:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"full_name": {
|
||||
"path_match": "name.*",
|
||||
"path_unmatch": "*.middle",
|
||||
"mapping": {
|
||||
"type": "string",
|
||||
"copy_to": "full_name"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"name": {
|
||||
"first": "Alice",
|
||||
"middle": "Mary",
|
||||
"last": "White"
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
[[template-variables]]
|
||||
==== `{name}` and `{dynamic_type}`
|
||||
|
||||
The `{name}` and `{dynamic_type}` placeholders are replaced in the `mapping`
|
||||
with the field name and detected dynamic type. The following example sets all
|
||||
string fields to use an <<analyzer,`analyzer`>> with the same name as the
|
||||
field, and disables <<doc-values,`doc_values`>> for all non-string fields:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"named_analyzers": {
|
||||
"match_mapping_type": "string",
|
||||
"match": "*",
|
||||
"mapping": {
|
||||
"type": "string",
|
||||
"analyzer": "{name}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"no_doc_values": {
|
||||
"match_mapping_type":"*",
|
||||
"mapping": {
|
||||
"type": "{dynamic_type}",
|
||||
"doc_values": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"english": "Some English text", <1>
|
||||
"count": 5 <2>
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `english` field is mapped as a `string` field with the `english` analyzer.
|
||||
<2> The `count` field is mapped as a `long` field with `doc_values` disabled
|
||||
|
|
@ -1,257 +0,0 @@
|
|||
[[fielddata-formats]]
|
||||
== Fielddata formats
|
||||
|
||||
The field data format controls how field data should be stored.
|
||||
|
||||
Depending on the field type, there might be several field data types
|
||||
available. In particular, string, geo-point and numeric types support the `doc_values`
|
||||
format which allows for computing the field data data-structures at indexing
|
||||
time and storing them on disk. Although it will make the index larger and may
|
||||
be slightly slower, this implementation will be more near-realtime-friendly
|
||||
and will require much less memory from the JVM than other implementations.
|
||||
|
||||
Here is an example of how to configure the `tag` field to use the `paged_bytes` field
|
||||
data format.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"fielddata": {
|
||||
"format": "paged_bytes"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
It is possible to change the field data format (and the field data settings
|
||||
in general) on a live index by using the update mapping API.
|
||||
|
||||
[float]
|
||||
=== String field data types
|
||||
|
||||
`paged_bytes` (default on analyzed string fields)::
|
||||
Stores unique terms sequentially in a large buffer and maps documents to
|
||||
the indices of the terms they contain in this large buffer.
|
||||
|
||||
`doc_values` (default when index is set to `not_analyzed`)::
|
||||
Computes and stores field data data-structures on disk at indexing time.
|
||||
Lowers memory usage but only works on non-analyzed strings (`index`: `no` or
|
||||
`not_analyzed`).
|
||||
|
||||
[float]
|
||||
=== Numeric field data types
|
||||
|
||||
`array`::
|
||||
Stores field values in memory using arrays.
|
||||
|
||||
`doc_values` (default unless doc values are disabled)::
|
||||
Computes and stores field data data-structures on disk at indexing time.
|
||||
|
||||
[float]
|
||||
=== Geo point field data types
|
||||
|
||||
`array`::
|
||||
Stores latitudes and longitudes in arrays.
|
||||
|
||||
`doc_values` (default unless doc values are disabled)::
|
||||
Computes and stores field data data-structures on disk at indexing time.
|
||||
|
||||
[float]
|
||||
[[global-ordinals]]
|
||||
=== Global ordinals
|
||||
|
||||
Global ordinals is a data-structure on top of field data, that maintains an
|
||||
incremental numbering for all the terms in field data in a lexicographic order.
|
||||
Each term has a unique number and the number of term 'A' is lower than the number
|
||||
of term 'B'. Global ordinals are only supported on string fields.
|
||||
|
||||
Field data on string also has ordinals, which is a unique numbering for all terms
|
||||
in a particular segment and field. Global ordinals just build on top of this,
|
||||
by providing a mapping between the segment ordinals and the global ordinals.
|
||||
The latter being unique across the entire shard.
|
||||
|
||||
Global ordinals can be beneficial in search features that use segment ordinals already
|
||||
such as the terms aggregator to improve the execution time. Often these search features
|
||||
need to merge the segment ordinal results to a cross segment terms result. With
|
||||
global ordinals this mapping happens during field data load time instead of during each
|
||||
query execution. With global ordinals search features only need to resolve the actual
|
||||
term when building the (shard) response, but during the execution there is no need
|
||||
at all to use the actual terms and the unique numbering global ordinals provided is
|
||||
sufficient and improves the execution time.
|
||||
|
||||
Global ordinals for a specified field are tied to all the segments of a shard (Lucene index),
|
||||
which is different than for field data for a specific field which is tied to a single segment.
|
||||
For this reason global ordinals need to be rebuilt in its entirety once new segments
|
||||
become visible. This one time cost would happen anyway without global ordinals, but
|
||||
then it would happen for each search execution instead!
|
||||
|
||||
The loading time of global ordinals depends on the number of terms in a field, but in general
|
||||
it is low, since it source field data has already been loaded. The memory overhead of global
|
||||
ordinals is a small because it is very efficiently compressed. Eager loading of global ordinals
|
||||
can move the loading time from the first search request, to the refresh itself.
|
||||
|
||||
[float]
|
||||
[[fielddata-loading]]
|
||||
=== Fielddata loading
|
||||
|
||||
By default, field data is loaded lazily, ie. the first time that a query that
|
||||
requires them is executed. However, this can make the first requests that
|
||||
follow a merge operation quite slow since fielddata loading is a heavy
|
||||
operation.
|
||||
|
||||
It is possible to force field data to be loaded and cached eagerly through the
|
||||
`loading` setting of fielddata:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"category": {
|
||||
"type": "string",
|
||||
"fielddata": {
|
||||
"loading": "eager"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
Global ordinals can also be eagerly loaded:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"category": {
|
||||
"type": "string",
|
||||
"fielddata": {
|
||||
"loading": "eager_global_ordinals"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
With the above setting both field data and global ordinals for a specific field
|
||||
are eagerly loaded.
|
||||
|
||||
[float]
|
||||
==== Disabling field data loading
|
||||
|
||||
Field data can take a lot of RAM so it makes sense to disable field data
|
||||
loading on the fields that don't need field data, for example those that are
|
||||
used for full-text search only. In order to disable field data loading, just
|
||||
change the field data format to `disabled`. When disabled, all requests that
|
||||
will try to load field data, e.g. when they include aggregations and/or sorting,
|
||||
will return an error.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"text": {
|
||||
"type": "string",
|
||||
"fielddata": {
|
||||
"format": "disabled"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
The `disabled` format is supported by all field types.
|
||||
|
||||
[float]
|
||||
[[field-data-filtering]]
|
||||
=== Filtering fielddata
|
||||
|
||||
It is possible to control which field values are loaded into memory,
|
||||
which is particularly useful for string fields. When specifying the
|
||||
<<mapping-core-types,mapping>> for a field, you
|
||||
can also specify a fielddata filter.
|
||||
|
||||
Fielddata filters can be changed using the
|
||||
<<indices-put-mapping,PUT mapping>>
|
||||
API. After changing the filters, use the
|
||||
<<indices-clearcache,Clear Cache>> API
|
||||
to reload the fielddata using the new filters.
|
||||
|
||||
[float]
|
||||
==== Filtering by frequency:
|
||||
|
||||
The frequency filter allows you to only load terms whose frequency falls
|
||||
between a `min` and `max` value, which can be expressed an absolute
|
||||
number (when the number is bigger than 1.0) or as a percentage
|
||||
(eg `0.01` is `1%` and `1.0` is `100%`). Frequency is calculated
|
||||
*per segment*. Percentages are based on the number of docs which have a
|
||||
value for the field, as opposed to all docs in the segment.
|
||||
|
||||
Small segments can be excluded completely by specifying the minimum
|
||||
number of docs that the segment should contain with `min_segment_size`:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"fielddata": {
|
||||
"filter": {
|
||||
"frequency": {
|
||||
"min": 0.001,
|
||||
"max": 0.1,
|
||||
"min_segment_size": 500
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Filtering by regex
|
||||
|
||||
Terms can also be filtered by regular expression - only values which
|
||||
match the regular expression are loaded. Note: the regular expression is
|
||||
applied to each term in the field, not to the whole field value. For
|
||||
instance, to only load hashtags from a tweet, we can use a regular
|
||||
expression which matches terms beginning with `#`:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"tweet": {
|
||||
"type": "string",
|
||||
"analyzer": "whitespace"
|
||||
"fielddata": {
|
||||
"filter": {
|
||||
"regex": {
|
||||
"pattern": "^#.*"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Combining filters
|
||||
|
||||
The `frequency` and `regex` filters can be combined:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"tweet": {
|
||||
"type": "string",
|
||||
"analyzer": "whitespace"
|
||||
"fielddata": {
|
||||
"filter": {
|
||||
"regex": {
|
||||
"pattern": "^#.*",
|
||||
},
|
||||
"frequency": {
|
||||
"min": 0.001,
|
||||
"max": 0.1,
|
||||
"min_segment_size": 500
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
|
@ -5,7 +5,8 @@ Each document has metadata associated with it, such as the `_index`, mapping
|
|||
<<mapping-type-field,`_type`>>, and `_id` meta-fields. The behaviour of some of these meta-fields
|
||||
can be customised when a mapping type is created.
|
||||
|
||||
The meta-fields are:
|
||||
[float]
|
||||
=== Identity meta-fields
|
||||
|
||||
[horizontal]
|
||||
<<mapping-index-field,`_index`>>::
|
||||
|
@ -18,16 +19,26 @@ The meta-fields are:
|
|||
|
||||
<<mapping-type-field,`_type`>>::
|
||||
|
||||
The document's <<all-mapping-types,mapping type>>.
|
||||
The document's <<mapping-type,mapping type>>.
|
||||
|
||||
<<mapping-id-field,`_id`>>::
|
||||
|
||||
The document's ID.
|
||||
|
||||
[float]
|
||||
=== Document source meta-fields
|
||||
|
||||
<<mapping-source-field,`_source`>>::
|
||||
|
||||
The original JSON representing the body of the document.
|
||||
|
||||
<<mapping-size-field,`_size`>>::
|
||||
|
||||
The size of the `_source` field in bytes.
|
||||
|
||||
[float]
|
||||
=== Indexing meta-fields
|
||||
|
||||
<<mapping-all-field,`_all`>>::
|
||||
|
||||
A _catch-all_ field that indexes the values of all other fields.
|
||||
|
@ -36,18 +47,6 @@ The meta-fields are:
|
|||
|
||||
All fields in the document which contain non-null values.
|
||||
|
||||
<<mapping-parent-field,`_parent`>>::
|
||||
|
||||
Used to create a parent-child relationship between two mapping types.
|
||||
|
||||
<<mapping-routing-field,`_routing`>>::
|
||||
|
||||
A custom routing value which routes a document to a particular shard.
|
||||
|
||||
<<mapping-size-field,`_size`>>::
|
||||
|
||||
The size of the `_source` field in bytes.
|
||||
|
||||
<<mapping-timestamp-field,`_timestamp`>>::
|
||||
|
||||
A timestamp associated with the document, either specified manually or auto-generated.
|
||||
|
@ -56,27 +55,49 @@ The meta-fields are:
|
|||
|
||||
How long a document should live before it is automatically deleted.
|
||||
|
||||
include::fields/index-field.asciidoc[]
|
||||
[float]
|
||||
=== Routing meta-fields
|
||||
|
||||
include::fields/uid-field.asciidoc[]
|
||||
<<mapping-parent-field,`_parent`>>::
|
||||
|
||||
include::fields/type-field.asciidoc[]
|
||||
Used to create a parent-child relationship between two mapping types.
|
||||
|
||||
<<mapping-routing-field,`_routing`>>::
|
||||
|
||||
A custom routing value which routes a document to a particular shard.
|
||||
|
||||
[float]
|
||||
=== Other meta-field
|
||||
|
||||
<<mapping-meta-field,`_meta`>>::
|
||||
|
||||
Application specific metadata.
|
||||
|
||||
include::fields/id-field.asciidoc[]
|
||||
|
||||
include::fields/source-field.asciidoc[]
|
||||
|
||||
include::fields/all-field.asciidoc[]
|
||||
|
||||
include::fields/field-names-field.asciidoc[]
|
||||
|
||||
include::fields/id-field.asciidoc[]
|
||||
|
||||
include::fields/index-field.asciidoc[]
|
||||
|
||||
include::fields/meta-field.asciidoc[]
|
||||
|
||||
include::fields/parent-field.asciidoc[]
|
||||
|
||||
include::fields/routing-field.asciidoc[]
|
||||
|
||||
include::fields/size-field.asciidoc[]
|
||||
|
||||
include::fields/source-field.asciidoc[]
|
||||
|
||||
include::fields/timestamp-field.asciidoc[]
|
||||
|
||||
include::fields/ttl-field.asciidoc[]
|
||||
|
||||
include::fields/type-field.asciidoc[]
|
||||
|
||||
include::fields/uid-field.asciidoc[]
|
||||
|
||||
|
|
|
@ -151,82 +151,18 @@ PUT my_index
|
|||
<1> The `_all` field is disabled for the `my_type` type.
|
||||
<2> The `query_string` query will default to querying the `content` field in this index.
|
||||
|
||||
[[include-in-all]]
|
||||
==== Including specific fields in `_all`
|
||||
[[excluding-from-all]]
|
||||
==== Excluding fields from `_all`
|
||||
|
||||
Individual fields can be included or excluded from the `_all` field with the
|
||||
`include_in_all` setting, which defaults to `true`:
|
||||
<<include-in-all,`include_in_all`>> setting.
|
||||
|
||||
[source,js]
|
||||
--------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"title": { <1>
|
||||
"type": "string"
|
||||
}
|
||||
"content": { <1>
|
||||
"type": "string"
|
||||
},
|
||||
"date": { <2>
|
||||
"type": "date",
|
||||
"include_in_all": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
<1> The `title` and `content` fields with be included in the `_all` field.
|
||||
<2> The `date` field will not be included in the `_all` field.
|
||||
|
||||
The `include_in_all` parameter can also be set at the type level and on
|
||||
<<mapping-object-type,`object`>> or <<mapping-nested-type,`nested`>> fields,
|
||||
in which case all sub-fields inherit that setting. For instance:
|
||||
|
||||
[source,js]
|
||||
--------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"include_in_all": false, <1>
|
||||
"properties": {
|
||||
"title": { "type": "string" },
|
||||
"author": {
|
||||
"include_in_all": true, <2>
|
||||
"properties": {
|
||||
"first_name": { "type": "string" },
|
||||
"last_name": { "type": "string" }
|
||||
}
|
||||
},
|
||||
"editor": {
|
||||
"properties": {
|
||||
"first_name": { "type": "string" }, <3>
|
||||
"last_name": { "type": "string", "include_in_all": true } <3>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
<1> All fields in `my_type` are excluded from `_all`.
|
||||
<2> The `author.first_name` and `author.last_name` fields are included in `_all`.
|
||||
<3> Only the `editor.last_name` field is included in `_all`.
|
||||
The `editor.first_name` inherits the type-level setting and is excluded.
|
||||
|
||||
[[all-field-and-boosting]]
|
||||
==== Index boosting and the `_all` field
|
||||
|
||||
Individual fields can be _boosted_ at index time, with the `boost` parameter.
|
||||
The `_all` field takes these boosts into account:
|
||||
Individual fields can be _boosted_ at index time, with the <<index-boost,`boost`>>
|
||||
parameter. The `_all` field takes these boosts into account:
|
||||
|
||||
[source,js]
|
||||
--------------------------------
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
=== `_id` field
|
||||
|
||||
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
|
||||
<<all-mapping-types,Mapping Types>>) and an <<mapping-id-field,`_id`>>. The
|
||||
`_id` field is not indexed as its value can be derived automatically from the
|
||||
<<mapping-type>>) and an <<mapping-id-field,`_id`>>. The `_id` field is not
|
||||
indexed as its value can be derived automatically from the
|
||||
<<mapping-uid-field,`_uid`>> field.
|
||||
|
||||
The value of the `_id` field is accessible in queries and scripts, but _not_
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
[[mapping-meta-field]]
|
||||
=== `_meta` field
|
||||
|
||||
Each mapping type can have custom meta data associated with it. These are not
|
||||
used at all by Elasticsearch, but can be used to store application-specific
|
||||
metadata, such as the class that a document belongs to:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"user": {
|
||||
"_meta": { <1>
|
||||
"class": "MyApp::User",
|
||||
"version": {
|
||||
"min": "1.0",
|
||||
"max": "1.3"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> This `_meta` info can be retrieved with the
|
||||
<<indices-get-mapping,GET mapping>> API.
|
||||
|
||||
The `_meta` field can be updated on an existing type using the
|
||||
<<indices-put-mapping,PUT mapping>> API.
|
|
@ -78,8 +78,7 @@ stored.
|
|||
WARNING: Removing fields from the `_source` has similar downsides to disabling
|
||||
`_source`, especially the fact that you cannot reindex documents from one
|
||||
Elasticsearch index to another. Consider using
|
||||
<<search-request-source-filtering,source filtering>> or a
|
||||
<<mapping-transform,transform script>> instead.
|
||||
<<search-request-source-filtering,source filtering>> instead.
|
||||
|
||||
The `includes`/`excludes` parameters (which also accept wildcards) can be used
|
||||
as follows:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[mapping-ttl-field]]
|
||||
=== `_ttl`
|
||||
=== `_ttl` field
|
||||
|
||||
Some types of documents, such as session data or special offers, come with an
|
||||
expiration date. The `_ttl` field allows you to specify the minimum time a
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
=== `_type` field
|
||||
|
||||
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
|
||||
<<all-mapping-types,Mapping Types>>) and an <<mapping-id-field,`_id`>>. The
|
||||
`_type` field is indexed in order to make searching by type name fast.
|
||||
<<mapping-type>>) and an <<mapping-id-field,`_id`>>. The `_type` field is
|
||||
indexed in order to make searching by type name fast.
|
||||
|
||||
The value of the `_type` field is accessible in queries, aggregations,
|
||||
scripts, and when sorting:
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
=== `_uid` field
|
||||
|
||||
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
|
||||
<<all-mapping-types,Mapping Types>>) and an <<mapping-id-field,`_id`>>. These
|
||||
values are combined as `{type}#{id}` and indexed as the `_uid` field.
|
||||
<<mapping-type>>) and an <<mapping-id-field,`_id`>>. These values are
|
||||
combined as `{type}#{id}` and indexed as the `_uid` field.
|
||||
|
||||
The value of the `_uid` field is accessible in queries, aggregations, scripts,
|
||||
and when sorting:
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
[[mapping-meta]]
|
||||
== Meta
|
||||
|
||||
Each mapping can have custom meta data associated with it. These are
|
||||
simple storage elements that are simply persisted along with the mapping
|
||||
and can be retrieved when fetching the mapping definition. The meta is
|
||||
defined under the `_meta` element, for example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"tweet" : {
|
||||
"_meta" : {
|
||||
"attr1" : "value1",
|
||||
"attr2" : {
|
||||
"attr3" : "value3"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
Meta can be handy for example for client libraries that perform
|
||||
serialization and deserialization to store its meta model (for example,
|
||||
the class the document maps to).
|
|
@ -0,0 +1,100 @@
|
|||
[[mapping-params]]
|
||||
== Mapping parameters
|
||||
|
||||
The following pages provide detailed explanations of the various mapping
|
||||
parameters that are used by <<mapping-types,field mappings>>:
|
||||
|
||||
|
||||
The following mapping parameters are common to some or all field datatypes:
|
||||
|
||||
* <<analyzer,`analyzer`>>
|
||||
* <<index-boost,`boost`>>
|
||||
* <<coerce,`coerce`>>
|
||||
* <<copy-to,`copy_to`>>
|
||||
* <<doc-values,`doc_values`>>
|
||||
* <<dynamic,`dynamic`>>
|
||||
* <<enabled,`enabled`>>
|
||||
* <<fielddata,`fielddata`>>
|
||||
* <<geohash,`geohash`>>
|
||||
* <<geohash-precision,`geohash_precision`>>
|
||||
* <<geohash-prefix,`geohash_prefix`>>
|
||||
* <<mapping-date-format,`format`>>
|
||||
* <<ignore-above,`ignore_above`>>
|
||||
* <<ignore-malformed,`ignore_malformed`>>
|
||||
* <<include-in-all,`include_in_all`>>
|
||||
* <<index-options,`index_options`>>
|
||||
* <<lat-lon,`lat_lon`>>
|
||||
* <<mapping-index,`index`>>
|
||||
* <<multi-fields,`fields`>>
|
||||
* <<norms,`norms`>>
|
||||
* <<null-value,`null_value`>>
|
||||
* <<position-offset-gap,`position_offset_gap`>>
|
||||
* <<properties,`properties`>>
|
||||
* <<search-analyzer,`search_analyzer`>>
|
||||
* <<similarity,`similarity`>>
|
||||
* <<mapping-store,`store`>>
|
||||
* <<term-vector,`term_vector`>>
|
||||
|
||||
|
||||
include::params/analyzer.asciidoc[]
|
||||
|
||||
include::params/boost.asciidoc[]
|
||||
|
||||
include::params/coerce.asciidoc[]
|
||||
|
||||
include::params/copy-to.asciidoc[]
|
||||
|
||||
include::params/doc-values.asciidoc[]
|
||||
|
||||
include::params/dynamic.asciidoc[]
|
||||
|
||||
include::params/enabled.asciidoc[]
|
||||
|
||||
include::params/fielddata.asciidoc[]
|
||||
|
||||
include::params/format.asciidoc[]
|
||||
|
||||
include::params/geohash.asciidoc[]
|
||||
|
||||
include::params/geohash-precision.asciidoc[]
|
||||
|
||||
include::params/geohash-prefix.asciidoc[]
|
||||
|
||||
include::params/ignore-above.asciidoc[]
|
||||
|
||||
include::params/ignore-malformed.asciidoc[]
|
||||
|
||||
include::params/include-in-all.asciidoc[]
|
||||
|
||||
include::params/index.asciidoc[]
|
||||
|
||||
include::params/index-options.asciidoc[]
|
||||
|
||||
include::params/lat-lon.asciidoc[]
|
||||
|
||||
include::params/multi-fields.asciidoc[]
|
||||
|
||||
include::params/norms.asciidoc[]
|
||||
|
||||
include::params/null-value.asciidoc[]
|
||||
|
||||
include::params/position-offset-gap.asciidoc[]
|
||||
|
||||
include::params/precision-step.asciidoc[]
|
||||
|
||||
include::params/properties.asciidoc[]
|
||||
|
||||
include::params/search-analyzer.asciidoc[]
|
||||
|
||||
include::params/similarity.asciidoc[]
|
||||
|
||||
include::params/store.asciidoc[]
|
||||
|
||||
include::params/term-vector.asciidoc[]
|
||||
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
[[analyzer]]
|
||||
=== `analyzer`
|
||||
|
||||
The values of <<mapping-index,`analyzed`>> string fields are passed through an
|
||||
<<analysis,analyzer>> to convert the string into a stream of _tokens_ or
|
||||
_terms_. For instance, the string `"The quick Brown Foxes."` may, depending
|
||||
on which analyzer is used, be analyzed to the tokens: `quick`, `brown`,
|
||||
`fox`. These are the actual terms that are indexed for the field, which makes
|
||||
it possible to search efficiently for individual words _within_ big blobs of
|
||||
text.
|
||||
|
||||
This analysis process needs to happen not just at index time, but also at
|
||||
query time: the query string needs to be passed through the same (or a
|
||||
similar) analyzer so that the terms that it tries to find are in the same
|
||||
format as those that exist in the index.
|
||||
|
||||
Elasticsearch ships with a number of <<analysis-analyzers,pre-defined analyzers>>,
|
||||
which can be used without further configuration. It also ships with many
|
||||
<<analysis-charfilters,character filters>>, <<analysis-tokenizers,tokenizers>>,
|
||||
and <<analysis-tokenfilters>> which can be combined to configure
|
||||
custom analyzers per index.
|
||||
|
||||
Analyzers can be specified per-query, per-field or per-index. At index time,
|
||||
Elasticsearch will look for an analyzer in this order:
|
||||
|
||||
* The `analyzer` defined in the field mapping.
|
||||
* An analyzer named `default` in the index settings.
|
||||
* The <<analysis-standard-analyzer,`standard`>> analyzer.
|
||||
|
||||
At query time, there are a few more layers:
|
||||
|
||||
* The `analyzer` defined in a <<full-text-queries,full-text query>>.
|
||||
* The `search_analyzer` defined in the field mapping.
|
||||
* The `analyzer` defined in the field mapping.
|
||||
* An analyzer named `default_search` in the index settings.
|
||||
* An analyzer named `default` in the index settings.
|
||||
* The <<analysis-standard-analyzer,`standard`>> analyzer.
|
||||
|
||||
The easiest way to specify an analyzer for a particular field is to define it
|
||||
in the field mapping, as follows:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"text": { <1>
|
||||
"type": "string",
|
||||
"fields": {
|
||||
"english": { <2>
|
||||
"type": "string",
|
||||
"analyzer": "english"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GET my_index/_analyze?field=text <3>
|
||||
{
|
||||
"text": "The quick Brown Foxes."
|
||||
}
|
||||
|
||||
GET my_index/_analyze?field=text.english <4>
|
||||
{
|
||||
"text": "The quick Brown Foxes."
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `text` field uses the default `standard` analyzer`.
|
||||
<2> The `text.english` <<multi-fields,multi-field>> uses the `english` analyzer, which removes stop words and applies stemming.
|
||||
<3> This returns the tokens: [ `the`, `quick`, `brown`, `foxes` ].
|
||||
<4> This returns the tokens: [ `quick`, `brown`, `fox` ].
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
[[index-boost]]
|
||||
=== `boost`
|
||||
|
||||
Individual fields can be _boosted_ -- count more towards the relevance score
|
||||
-- at index time, with the `boost` parameter as follows:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string",
|
||||
"boost": 2 <1>
|
||||
},
|
||||
"content": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
<1> Matches on the `title` field will have twice the weight as those on the
|
||||
`content` field, which has the default `boost` of `1.0`.
|
||||
|
||||
Note that a `title` field will usually be shorter than a `content` field. The
|
||||
default relevance calculation takes field length into account, so a short
|
||||
`title` field will have a higher natural boost than a long `content` field.
|
||||
|
||||
[WARNING]
|
||||
.Why index time boosting is a bad idea
|
||||
==================================================
|
||||
|
||||
We advise against using index time boosting for the following reasons:
|
||||
|
||||
* You cannot change index-time `boost` values without reindexing all of your
|
||||
documents.
|
||||
|
||||
* Every query supports query-time boosting which achieves the same effect. The
|
||||
difference is that you can tweak the `boost` value without having to reindex.
|
||||
|
||||
* Index-time boosts are stored as part of the <<norms,`norm`>>, which is only one
|
||||
byte. This reduces the resolution of the field length normalization factor
|
||||
which can lead to lower quality relevance calculations.
|
||||
|
||||
==================================================
|
||||
|
||||
The only advantage that index time boosting has is that it is copied with the
|
||||
value into the <<mapping-all-field,`_all`>> field. This means that, when
|
||||
querying the `_all` field, words that originated from the `title` field will
|
||||
have a higher score than words that originated in the `content` field.
|
||||
This functionality comes at a cost: queries on the `_all` field are slower
|
||||
when index-time boosting is used.
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
[[coerce]]
|
||||
=== `coerce`
|
||||
|
||||
Data is not always clean. Depending on how it is produced a number might be
|
||||
rendered in the JSON body as a true JSON number, e.g. `5`, but it might also
|
||||
be rendered as a string, e.g. `"5"`. Alternatively, a number that should be
|
||||
an integer might instead be rendered as a floating point, e.g. `5.0`, or even
|
||||
`"5.0"`.
|
||||
|
||||
Coercion attempts to clean up dirty values to fit the datatype of a field.
|
||||
For instance:
|
||||
|
||||
* Strings will be coerced to numbers.
|
||||
* Floating points will be truncated for integer values.
|
||||
* Lon/lat geo-points will be normalized to a standard -180:180 / -90:90 coordinate system.
|
||||
|
||||
For instance:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"number_one": {
|
||||
"type": "integer"
|
||||
},
|
||||
"number_two": {
|
||||
"type": "integer",
|
||||
"coerce": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"number_one": "10" <1>
|
||||
}
|
||||
|
||||
PUT my_index/my_type/2
|
||||
{
|
||||
"number_two": "10" <2>
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `number_one` field will contain the integer `10`.
|
||||
<2> This document will be rejected because coercion is disabled.
|
||||
|
||||
[[coerce-setting]]
|
||||
==== Index-level default
|
||||
|
||||
The `index.mapping.coerce` setting can be set on the index level to disable
|
||||
coercion globally across all mapping types:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"settings": {
|
||||
"index.mapping.coerce": false
|
||||
},
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"number_one": {
|
||||
"type": "integer"
|
||||
},
|
||||
"number_two": {
|
||||
"type": "integer",
|
||||
"coerce": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{ "number_one": "10" } <1>
|
||||
|
||||
PUT my_index/my_type/2
|
||||
{ "number_two": "10" } <2>
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> This document will be rejected because the `number_one` field inherits the index-level coercion setting.
|
||||
<2> The `number_two` field overrides the index level setting to enable coercion.
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
[[copy-to]]
|
||||
=== `copy_to`
|
||||
|
||||
The `copy_to` parameter allows you to create custom
|
||||
<<mapping-all-field,`_all`>> fields. In other words, the values of multiple
|
||||
fields can be copied into a group field, which can then be queried as a single
|
||||
field. For instance, the `first_name` and `last_name` fields can be copied to
|
||||
the `full_name` field as follows:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"first_name": {
|
||||
"type": "string",
|
||||
"copy_to": "full_name" <1>
|
||||
},
|
||||
"last_name": {
|
||||
"type": "string",
|
||||
"copy_to": "full_name" <1>
|
||||
},
|
||||
"full_name": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT /my_index/my_type/1
|
||||
{
|
||||
"first_name": "John",
|
||||
"last_name": "Smith"
|
||||
}
|
||||
|
||||
GET /my_index/_search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"full_name": { <2>
|
||||
"query": "John Smith",
|
||||
"operator": "and"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The values of the `first_name` and `last_name` fields are copied to the
|
||||
`full_name` field.
|
||||
|
||||
<2> The `first_name` and `last_name` fields can still be queried for the
|
||||
first name and last name respectively, but the `full_name` field can be
|
||||
queried for both first and last names.
|
||||
|
||||
Some important points:
|
||||
|
||||
* It is the field _value_ which is copied, not the terms (which result from the analysis process).
|
||||
* The original <<mapping-source-field,`_source`>> field will not be modified to show the copied values.
|
||||
* The same value can be copied to multiple fields, with `"copy_to": [ "field_1", "field_2" ]`
|
|
@ -0,0 +1,46 @@
|
|||
[[doc-values]]
|
||||
=== `doc_values`
|
||||
|
||||
Most fields are <<mapping-index,indexed>> by default, which makes them
|
||||
searchable. The inverted index allows queries to look up the search term in
|
||||
unique sorted list of terms, and from that immediately have access to the list
|
||||
of documents that contain the term.
|
||||
|
||||
Sorting, aggregations, and access to field values in scripts requires a
|
||||
different data access pattern. Instead of lookup up the term and finding
|
||||
documents, we need to be able to look up the document and find the terms that
|
||||
is has in a field.
|
||||
|
||||
Doc values are the on-disk data structure, built at document index time, which
|
||||
makes this data access pattern possible. Doc values are supported on almost
|
||||
all field types, with the __notable exception of `analyzed` string fields__.
|
||||
|
||||
All fields which support doc values have them enabled by default. If you are
|
||||
sure that you don't need to sort or aggregate on a field, or access the field
|
||||
value from a script, you can disable doc values in order to save disk space:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"status_code": { <1>
|
||||
"type": "string",
|
||||
"index": "not_analyzed"
|
||||
},
|
||||
"session_id": { <2>
|
||||
"type": "string",
|
||||
"index": "not_analyzed",
|
||||
"doc_values": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `status_code` field has `doc_values` enabled by default.
|
||||
<2> The `session_id` has `doc_values` disabled, but can still be queried.
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
[[dynamic]]
|
||||
=== `dynamic`
|
||||
|
||||
By default, fields can be added _dynamically_ to a document, or to
|
||||
<<object,inner objects>> within a document, just by indexing a document
|
||||
containing the new field. For instance:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
DELETE my_index <1>
|
||||
|
||||
PUT my_index/my_type/1 <2>
|
||||
{
|
||||
"username": "johnsmith",
|
||||
"name": {
|
||||
"first": "John",
|
||||
"last": "Smith"
|
||||
}
|
||||
}
|
||||
|
||||
GET my_index/_mapping <3>
|
||||
|
||||
PUT my_index/my_type/2 <4>
|
||||
{
|
||||
"username": "marywhite",
|
||||
"email": "mary@white.com",
|
||||
"name": {
|
||||
"first": "Mary",
|
||||
"middle": "Alice",
|
||||
"last": "White"
|
||||
}
|
||||
}
|
||||
|
||||
GET my_index/_mapping <5>
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> First delete the index, in case it already exists.
|
||||
<2> This document introduces the string field `username`, the object field
|
||||
`name`, and two string fields under the `name` object which can be
|
||||
referred to as `name.first` and `name.last`.
|
||||
<3> Check the mapping to verify the above.
|
||||
<4> This document adds two string fields: `email` and `name.middle`.
|
||||
<5> Check the mapping to verify the changes.
|
||||
|
||||
The details of how new fields are detected and added to the mapping is explained in <<dynamic-mapping>>.
|
||||
|
||||
The `dynamic` setting controls whether new fields can be added dynamically or
|
||||
not. It accepts three settings:
|
||||
|
||||
[horizontal]
|
||||
`true`:: Newly detected fields are added to the mapping. (default)
|
||||
`false`:: Newly detected fields are ignored. New fields must be added explicitly.
|
||||
`strict`:: If new fields are detected, an exception is thrown and the document is rejected.
|
||||
|
||||
The `dynamic` setting may be set at the mapping type level, and on each
|
||||
<<object,inner object>>. Inner objects inherit the setting from their parent
|
||||
object or from the mapping type. For instance:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"dynamic": false, <1>
|
||||
"properties": {
|
||||
"user": { <2>
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"social_networks": { <3>
|
||||
"dynamic": true,
|
||||
"properties": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> Dynamic mapping is disabled at the type level, so no new top-level fields will be added dynamically.
|
||||
<2> The `user` object inherits the type-level setting.
|
||||
<3> The `user.social_networks` object enables dynamic mapping, so new fields may be added to this inner object.
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
[[enabled]]
|
||||
=== `enabled`
|
||||
|
||||
Elasticsearch tries to index all of the fields you give it, but sometimes you
|
||||
want to just store the field without indexing it. For instance, imagine that
|
||||
you are using Elasticsearch as a web session store. You may want to index the
|
||||
session ID and last update time, but you don't need to query or run
|
||||
aggregations on the session data itself.
|
||||
|
||||
The `enabled` setting, which can be applied only to the mapping type and to
|
||||
<<object,`object`>> fields, causes Elasticsearch to skip parsing of the
|
||||
contents of the field entirely. The JSON can still be retrieved from the
|
||||
<<mapping-source-field,`_source`>> field, but it is not searchable or stored
|
||||
in any other way:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"session": {
|
||||
"properties": {
|
||||
"user_id": {
|
||||
"type": "string",
|
||||
"index": "not_analyzed"
|
||||
},
|
||||
"last_updated": {
|
||||
"type": "date"
|
||||
},
|
||||
"session_data": { <1>
|
||||
"enabled": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/session/session_1
|
||||
{
|
||||
"user_id": "kimchy",
|
||||
"session_data": { <2>
|
||||
"arbitrary_object": {
|
||||
"some_array": [ "foo", "bar", { "baz": 2 } ]
|
||||
}
|
||||
},
|
||||
"last_updated": "2015-12-06T18:20:22"
|
||||
}
|
||||
|
||||
PUT my_index/session/session_2
|
||||
{
|
||||
"user_id": "jpountz",
|
||||
"session_data": "none", <3>
|
||||
"last_updated": "2015-12-06T18:22:13"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `session_data` field is disabled.
|
||||
<2> Any arbitrary data can be passed to the `session_data` field as it will be entirely ignored.
|
||||
<3> The `session_data` will also ignore values that are not JSON objects.
|
||||
|
||||
The entire mapping type may be disabled as well, in which case the document is
|
||||
stored in the <<mapping-source-field,`_source`>> field, which means it can be
|
||||
retrieved, but none of its contents are indexed in any way:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"session": { <1>
|
||||
"enabled": false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/session/session_1
|
||||
{
|
||||
"user_id": "kimchy",
|
||||
"session_data": {
|
||||
"arbitrary_object": {
|
||||
"some_array": [ "foo", "bar", { "baz": 2 } ]
|
||||
}
|
||||
},
|
||||
"last_updated": "2015-12-06T18:20:22"
|
||||
}
|
||||
|
||||
GET my_index/session/session_1 <2>
|
||||
|
||||
GET my_index/_mapping <3>
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The entire `session` mapping type is disabled.
|
||||
<2> The document can be retrieved.
|
||||
<3> Checking the mapping reveals that no fields have been added.
|
|
@ -0,0 +1,225 @@
|
|||
[[fielddata]]
|
||||
=== `fielddata`
|
||||
|
||||
Most fields are <<mapping-index,indexed>> by default, which makes them
|
||||
searchable. The inverted index allows queries to look up the search term in
|
||||
unique sorted list of terms, and from that immediately have access to the list
|
||||
of documents that contain the term.
|
||||
|
||||
Sorting, aggregations, and access to field values in scripts requires a
|
||||
different data access pattern. Instead of lookup up the term and finding
|
||||
documents, we need to be able to look up the document and find the terms that
|
||||
is has in a field.
|
||||
|
||||
Most fields can use index-time, on-disk <<doc-values,`doc_values`>> to support
|
||||
this type of data access pattern, but `analyzed` string fields do not support
|
||||
`doc_values`.
|
||||
|
||||
Instead, `analyzed` strings use a query-time data structure called
|
||||
`fielddata`. This data structure is built on demand the first time that a
|
||||
field is used for aggregations, sorting, or is accessed in a script. It is built
|
||||
by reading the entire inverted index for each segment from disk, inverting the
|
||||
term ↔︎ document relationship, and storing the result in memory, in the
|
||||
JVM heap.
|
||||
|
||||
|
||||
Loading fielddata is an expensive process so, once it has been loaded, it
|
||||
remains in memory for the lifetime of the segment.
|
||||
|
||||
[WARNING]
|
||||
.Fielddata can fill up your heap space
|
||||
==============================================================================
|
||||
Fielddata can consume a lot of heap space, especially when loading high
|
||||
cardinality `analyzed` string fields. Most of the time, it doesn't make sense
|
||||
to sort or aggregate on `analyzed` string fields (with the notable exception
|
||||
of the
|
||||
<<search-aggregations-bucket-significantterms-aggregation,`significant_terms`>>
|
||||
aggregation). Always think about whether a `not_analyzed` field (which can
|
||||
use `doc_values`) would be a better fit for your use case.
|
||||
==============================================================================
|
||||
|
||||
[[fielddata-format]]
|
||||
==== `fielddata.format`
|
||||
|
||||
For `analyzed` string fields, the fielddata `format` controls whether
|
||||
fielddata should be enabled or not. It accepts: `disabled` and `paged_bytes`
|
||||
(enabled, which is the default). To disable fielddata loading, you can use
|
||||
the following mapping:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string",
|
||||
"fielddata": {
|
||||
"format": "disabled" <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `text` field cannot be used for sorting, aggregations, or in scripts.
|
||||
|
||||
.Fielddata and other datatypes
|
||||
[NOTE]
|
||||
==================================================
|
||||
|
||||
Historically, other field datatypes also used fielddata, but this has been replaced
|
||||
by index-time, disk-based <<doc-values,`doc_values`>>.
|
||||
|
||||
==================================================
|
||||
|
||||
|
||||
[[fielddata-loading]]
|
||||
==== `fielddata.loading`
|
||||
|
||||
This per-field setting controls when fielddata is loaded into memory. It
|
||||
accepts three options:
|
||||
|
||||
[horizontal]
|
||||
`lazy`::
|
||||
|
||||
Fielddata is only loaded into memory when it is needed. (default)
|
||||
|
||||
`eager`::
|
||||
|
||||
Fielddata is loaded into memory before a new search segment becomes
|
||||
visible to search. This can reduce the latency that a user may experience
|
||||
if their search request has to trigger lazy loading from a big segment.
|
||||
|
||||
`eager_global_ordinals`::
|
||||
|
||||
Loading fielddata into memory is only part of the work that is required.
|
||||
After loading the fielddata for each segment, Elasticsearch builds the
|
||||
<<global-ordinals>> data structure to make a list of all unique terms
|
||||
across all the segments in a shard. By default, global ordinals are built
|
||||
lazily. If the field has a very high cardinality, global ordinals may
|
||||
take some time to build, in which case you can use eager loading instead.
|
||||
|
||||
[[global-ordinals]]
|
||||
.Global ordinals
|
||||
*****************************************
|
||||
|
||||
Global ordinals is a data-structure on top of fielddata and doc values, that
|
||||
maintains an incremental numbering for each unique term in a lexicographic
|
||||
order. Each term has a unique number and the number of term 'A' is lower than
|
||||
the number of term 'B'. Global ordinals are only supported on string fields.
|
||||
|
||||
Fielddata and doc values also have ordinals, which is a unique numbering for all terms
|
||||
in a particular segment and field. Global ordinals just build on top of this,
|
||||
by providing a mapping between the segment ordinals and the global ordinals,
|
||||
the latter being unique across the entire shard.
|
||||
|
||||
Global ordinals are used for features that use segment ordinals, such as
|
||||
sorting and the terms aggregation, to improve the execution time. A terms
|
||||
aggregation relies purely on global ordinals to perform the aggregation at the
|
||||
shard level, then converts global ordinals to the real term only for the final
|
||||
reduce phase, which combines results from different shards.
|
||||
|
||||
Global ordinals for a specified field are tied to _all the segments of a
|
||||
shard_, while fielddata and doc values ordinals are tied to a single segment.
|
||||
which is different than for field data for a specific field which is tied to a
|
||||
single segment. For this reason global ordinals need to be entirely rebuilt
|
||||
whenever a once new segment becomes visible.
|
||||
|
||||
The loading time of global ordinals depends on the number of terms in a field, but in general
|
||||
it is low, since it source field data has already been loaded. The memory overhead of global
|
||||
ordinals is a small because it is very efficiently compressed. Eager loading of global ordinals
|
||||
can move the loading time from the first search request, to the refresh itself.
|
||||
|
||||
*****************************************
|
||||
|
||||
[[field-data-filtering]]
|
||||
==== `fielddata.filter`
|
||||
|
||||
Fielddata filtering can be used to reduce the number of terms loaded into
|
||||
memory, and thus reduce memory usage. Terms can be filtered by _frequency_ or
|
||||
by _regular expression_, or a combination of the two:
|
||||
|
||||
Filtering by frequency::
|
||||
+
|
||||
--
|
||||
|
||||
The frequency filter allows you to only load terms whose term frequency falls
|
||||
between a `min` and `max` value, which can be expressed an absolute
|
||||
number (when the number is bigger than 1.0) or as a percentage
|
||||
(eg `0.01` is `1%` and `1.0` is `100%`). Frequency is calculated
|
||||
*per segment*. Percentages are based on the number of docs which have a
|
||||
value for the field, as opposed to all docs in the segment.
|
||||
|
||||
Small segments can be excluded completely by specifying the minimum
|
||||
number of docs that the segment should contain with `min_segment_size`:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"fielddata": {
|
||||
"filter": {
|
||||
"frequency": {
|
||||
"min": 0.001,
|
||||
"max": 0.1,
|
||||
"min_segment_size": 500
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
--
|
||||
|
||||
Filtering by regex::
|
||||
+
|
||||
--
|
||||
Terms can also be filtered by regular expression - only values which
|
||||
match the regular expression are loaded. Note: the regular expression is
|
||||
applied to each term in the field, not to the whole field value. For
|
||||
instance, to only load hashtags from a tweet, we can use a regular
|
||||
expression which matches terms beginning with `#`:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"tweet": {
|
||||
"type": "string",
|
||||
"analyzer": "whitespace",
|
||||
"fielddata": {
|
||||
"filter": {
|
||||
"regex": {
|
||||
"pattern": "^#.*"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
--
|
||||
|
||||
These filters can be updated on an existing field mapping and will take
|
||||
effect the next time the fielddata for a segment is loaded. Use the
|
||||
<<indices-clearcache,Clear Cache>> API
|
||||
to reload the fielddata using the new filters.
|
|
@ -0,0 +1,281 @@
|
|||
[[mapping-date-format]]
|
||||
=== `format`
|
||||
|
||||
In JSON documents, dates are represented as strings. Elasticsearch uses a set
|
||||
of preconfigured formats to recognize and parse these strings into a long
|
||||
value representing _milliseconds-since-the-epoch_ in UTC.
|
||||
|
||||
Besides the <<built-in-date-formats,built-in formats>>, your own
|
||||
<<custom-date-formats,custom formats>> can be specified using the familiar
|
||||
`yyyy/MM/dd` syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"date": {
|
||||
"type": "date",
|
||||
"format": "yyyy-MM-dd"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
Many APIs which support date values also support <<date-math,date math>>
|
||||
expressions, such as `now-1m/d` -- the current time, minus one month, rounded
|
||||
down to the nearest day.
|
||||
|
||||
[[custom-date-formats]]
|
||||
==== Custom date formats
|
||||
|
||||
Completely customizable date formats are supported. The syntax for these is explained
|
||||
http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html[in the Joda docs].
|
||||
|
||||
[[built-in-date-formats]]
|
||||
==== Built In Formats
|
||||
|
||||
Most of the below dates have a `strict` companion dates, which means, that
|
||||
year, month and day parts of the week must have prepending zeros in order
|
||||
to be valid. This means, that a date like `5/11/1` would not be valid, but
|
||||
you would need to specify the full date, which would be `2005/11/01` in this
|
||||
example. So instead of `date_optional_time` you would need to specify
|
||||
`strict_date_optional_time`.
|
||||
|
||||
The following tables lists all the defaults ISO formats supported:
|
||||
|
||||
`epoch_millis`::
|
||||
|
||||
A formatter for the number of milliseconds since the epoch. Note, that
|
||||
this timestamp allows a max length of 13 chars, so dates older than 1653
|
||||
and 2286 are not supported. You should use a different date formatter in
|
||||
that case.
|
||||
|
||||
`epoch_second`::
|
||||
|
||||
A formatter for the number of seconds since the epoch. Note, that this
|
||||
timestamp allows a max length of 10 chars, so dates older than 1653 and
|
||||
2286 are not supported. You should use a different date formatter in that
|
||||
case.
|
||||
|
||||
[[strict-date-time]]`date_optional_time` or `strict_date_optional_time`::
|
||||
|
||||
A generic ISO datetime parser where the date is mandatory and the time is
|
||||
optional.
|
||||
http://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateOptionalTimeParser--[Full details here].
|
||||
|
||||
`basic_date`::
|
||||
|
||||
A basic formatter for a full date as four digit year, two digit month of
|
||||
year, and two digit day of month: `yyyyMMdd`.
|
||||
|
||||
`basic_date_time`::
|
||||
|
||||
A basic formatter that combines a basic date and time, separated by a 'T':
|
||||
`yyyyMMdd'T'HHmmss.SSSZ`.
|
||||
|
||||
`basic_date_time_no_millis`::
|
||||
|
||||
A basic formatter that combines a basic date and time without millis,
|
||||
separated by a 'T': `yyyyMMdd'T'HHmmssZ`.
|
||||
|
||||
`basic_ordinal_date`::
|
||||
|
||||
A formatter for a full ordinal date, using a four digit year and three
|
||||
digit dayOfYear: `yyyyDDD`.
|
||||
|
||||
`basic_ordinal_date_time`::
|
||||
|
||||
A formatter for a full ordinal date and time, using a four digit year and
|
||||
three digit dayOfYear: `yyyyDDD'T'HHmmss.SSSZ`.
|
||||
|
||||
`basic_ordinal_date_time_no_millis`::
|
||||
|
||||
A formatter for a full ordinal date and time without millis, using a four
|
||||
digit year and three digit dayOfYear: `yyyyDDD'T'HHmmssZ`.
|
||||
|
||||
`basic_time`::
|
||||
|
||||
A basic formatter for a two digit hour of day, two digit minute of hour,
|
||||
two digit second of minute, three digit millis, and time zone offset:
|
||||
`HHmmss.SSSZ`.
|
||||
|
||||
`basic_time_no_millis`::
|
||||
|
||||
A basic formatter for a two digit hour of day, two digit minute of hour,
|
||||
two digit second of minute, and time zone offset: `HHmmssZ`.
|
||||
|
||||
`basic_t_time`::
|
||||
|
||||
A basic formatter for a two digit hour of day, two digit minute of hour,
|
||||
two digit second of minute, three digit millis, and time zone off set
|
||||
prefixed by 'T': `'T'HHmmss.SSSZ`.
|
||||
|
||||
`basic_t_time_no_millis`::
|
||||
|
||||
A basic formatter for a two digit hour of day, two digit minute of hour,
|
||||
two digit second of minute, and time zone offset prefixed by 'T':
|
||||
`'T'HHmmssZ`.
|
||||
|
||||
`basic_week_date` or `strict_basic_week_date`::
|
||||
|
||||
A basic formatter for a full date as four digit weekyear, two digit week
|
||||
of weekyear, and one digit day of week: `xxxx'W'wwe`.
|
||||
|
||||
`basic_week_date_time` or `strict_basic_week_date_time`::
|
||||
|
||||
A basic formatter that combines a basic weekyear date and time, separated
|
||||
by a 'T': `xxxx'W'wwe'T'HHmmss.SSSZ`.
|
||||
|
||||
`basic_week_date_time_no_millis` or `strict_basic_week_date_time_no_millis`::
|
||||
|
||||
A basic formatter that combines a basic weekyear date and time without
|
||||
millis, separated by a 'T': `xxxx'W'wwe'T'HHmmssZ`.
|
||||
|
||||
`date` or `strict_date`::
|
||||
|
||||
A formatter for a full date as four digit year, two digit month of year,
|
||||
and two digit day of month: `yyyy-MM-dd`.
|
||||
|
||||
`date_hour` or `strict_date_hour`::
|
||||
|
||||
A formatter that combines a full date and two digit hour of day.
|
||||
|
||||
`date_hour_minute` or `strict_date_hour_minute`::
|
||||
|
||||
A formatter that combines a full date, two digit hour of day, and two
|
||||
digit minute of hour.
|
||||
|
||||
`date_hour_minute_second` or `strict_date_hour_minute_second`::
|
||||
|
||||
A formatter that combines a full date, two digit hour of day, two digit
|
||||
minute of hour, and two digit second of minute.
|
||||
|
||||
`date_hour_minute_second_fraction` or `strict_date_hour_minute_second_fraction`::
|
||||
|
||||
A formatter that combines a full date, two digit hour of day, two digit
|
||||
minute of hour, two digit second of minute, and three digit fraction of
|
||||
second: `yyyy-MM-dd'T'HH:mm:ss.SSS`.
|
||||
|
||||
`date_hour_minute_second_millis` or `strict_date_hour_minute_second_millis`::
|
||||
|
||||
A formatter that combines a full date, two digit hour of day, two digit
|
||||
minute of hour, two digit second of minute, and three digit fraction of
|
||||
second: `yyyy-MM-dd'T'HH:mm:ss.SSS`.
|
||||
|
||||
`date_time` or `strict_date_time`::
|
||||
|
||||
A formatter that combines a full date and time, separated by a 'T': `yyyy-
|
||||
MM-dd'T'HH:mm:ss.SSSZZ`.
|
||||
|
||||
`date_time_no_millis` or `strict_date_time_no_millis`::
|
||||
|
||||
A formatter that combines a full date and time without millis, separated
|
||||
by a 'T': `yyyy-MM-dd'T'HH:mm:ssZZ`.
|
||||
|
||||
`hour` or `strict_hour`::
|
||||
|
||||
A formatter for a two digit hour of day.
|
||||
|
||||
`hour_minute` or `strict_hour_minute`::
|
||||
|
||||
A formatter for a two digit hour of day and two digit minute of hour.
|
||||
|
||||
`hour_minute_second` or `strict_hour_minute_second`::
|
||||
|
||||
A formatter for a two digit hour of day, two digit minute of hour, and two
|
||||
digit second of minute.
|
||||
|
||||
`hour_minute_second_fraction` or `strict_hour_minute_second_fraction`::
|
||||
|
||||
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||
digit second of minute, and three digit fraction of second: `HH:mm:ss.SSS`.
|
||||
|
||||
`hour_minute_second_millis` or `strict_hour_minute_second_millis`::
|
||||
|
||||
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||
digit second of minute, and three digit fraction of second: `HH:mm:ss.SSS`.
|
||||
|
||||
`ordinal_date` or `strict_ordinal_date`::
|
||||
|
||||
A formatter for a full ordinal date, using a four digit year and three
|
||||
digit dayOfYear: `yyyy-DDD`.
|
||||
|
||||
`ordinal_date_time` or `strict_ordinal_date_time`::
|
||||
|
||||
A formatter for a full ordinal date and time, using a four digit year and
|
||||
three digit dayOfYear: `yyyy-DDD'T'HH:mm:ss.SSSZZ`.
|
||||
|
||||
`ordinal_date_time_no_millis` or `strict_ordinal_date_time_no_millis`::
|
||||
|
||||
A formatter for a full ordinal date and time without millis, using a four
|
||||
digit year and three digit dayOfYear: `yyyy-DDD'T'HH:mm:ssZZ`.
|
||||
|
||||
`time` or `strict_time`::
|
||||
|
||||
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||
digit second of minute, three digit fraction of second, and time zone
|
||||
offset: `HH:mm:ss.SSSZZ`.
|
||||
|
||||
`time_no_millis` or `strict_time_no_millis`::
|
||||
|
||||
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||
digit second of minute, and time zone offset: `HH:mm:ssZZ`.
|
||||
|
||||
`t_time` or `strict_t_time`::
|
||||
|
||||
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||
digit second of minute, three digit fraction of second, and time zone
|
||||
offset prefixed by 'T': `'T'HH:mm:ss.SSSZZ`.
|
||||
|
||||
`t_time_no_millis` or `strict_t_time_no_millis`::
|
||||
|
||||
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||
digit second of minute, and time zone offset prefixed by 'T': `'T'HH:mm:ssZZ`.
|
||||
|
||||
`week_date` or `strict_week_date`::
|
||||
|
||||
A formatter for a full date as four digit weekyear, two digit week of
|
||||
weekyear, and one digit day of week: `xxxx-'W'ww-e`.
|
||||
|
||||
`week_date_time` or `strict_week_date_time`::
|
||||
|
||||
A formatter that combines a full weekyear date and time, separated by a
|
||||
'T': `xxxx-'W'ww-e'T'HH:mm:ss.SSSZZ`.
|
||||
|
||||
`week_date_time_no_millis` or `strict_week_date_time_no_millis`::
|
||||
|
||||
A formatter that combines a full weekyear date and time without millis,
|
||||
separated by a 'T': `xxxx-'W'ww-e'T'HH:mm:ssZZ`.
|
||||
|
||||
`weekyear` or `strict_weekyear`::
|
||||
|
||||
A formatter for a four digit weekyear.
|
||||
|
||||
`weekyear_week` or `strict_weekyear_week`::
|
||||
|
||||
A formatter for a four digit weekyear and two digit week of weekyear.
|
||||
|
||||
`weekyear_week_day` or `strict_weekyear_week_day`::
|
||||
|
||||
A formatter for a four digit weekyear, two digit week of weekyear, and one
|
||||
digit day of week.
|
||||
|
||||
`year` or `strict_year`::
|
||||
|
||||
A formatter for a four digit year.
|
||||
|
||||
`year_month` or `strict_year_month`::
|
||||
|
||||
A formatter for a four digit year and two digit month of year.
|
||||
|
||||
`year_month_day` or `strict_year_month_day`::
|
||||
|
||||
A formatter for a four digit year, two digit month of year, and two digit
|
||||
day of month.
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
[[geohash-precision]]
|
||||
=== `geohash_precision`
|
||||
|
||||
Geohashes are a form of lat/lon encoding which divides the earth up into
|
||||
a grid. Each cell in this grid is represented by a geohash string. Each
|
||||
cell in turn can be further subdivided into smaller cells which are
|
||||
represented by a longer string. So the longer the geohash, the smaller
|
||||
(and thus more accurate) the cell is.
|
||||
|
||||
The `geohash_precision` setting controls the length of the geohash that is
|
||||
indexed when the <<geohash,`geohash`>> option is enabled, and the maximum
|
||||
geohash length when the <<geohash-prefix,`geohash_prefix`>> option is enabled.
|
||||
|
||||
It accepts:
|
||||
|
||||
* a number between 1 and 12 (default), which represents the length of the geohash.
|
||||
* a <<distance-units,distance>>, e.g. `1km`.
|
||||
|
||||
If a distance is specified, it will be translated to the smallest
|
||||
geohash-length that will provide the requested resolution.
|
||||
|
||||
For example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "geo_point",
|
||||
"geohash_prefix": true,
|
||||
"geohash_precision": 6 <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"location": {
|
||||
"lat": 41.12,
|
||||
"lon": -71.34
|
||||
}
|
||||
}
|
||||
|
||||
GET my_index/_search?fielddata_fields=location.geohash
|
||||
{
|
||||
"query": {
|
||||
"term": {
|
||||
"location.geohash": "drm3bt"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> A `geohash_precision` of 6 equates to geohash cells of approximately 1.26km x 0.6km
|
|
@ -0,0 +1,64 @@
|
|||
[[geohash-prefix]]
|
||||
=== `geohash_prefix`
|
||||
|
||||
Geohashes are a form of lat/lon encoding which divides the earth up into
|
||||
a grid. Each cell in this grid is represented by a geohash string. Each
|
||||
cell in turn can be further subdivided into smaller cells which are
|
||||
represented by a longer string. So the longer the geohash, the smaller
|
||||
(and thus more accurate) the cell is.
|
||||
|
||||
While the <<geohash,`geohash`>> option enables indexing the geohash that
|
||||
corresponds to the lat/lon point, at the specified
|
||||
<<geohash-precision,precision>>, the `geohash_prefix` option will also
|
||||
index all the enclosing cells as well.
|
||||
|
||||
For instance, a geohash of `drm3btev3e86` will index all of the following
|
||||
terms: [ `d`, `dr`, `drm`, `drm3`, `drm3b`, `drm3bt`, `drm3bte`, `drm3btev`,
|
||||
`drm3btev3`, `drm3btev3e`, `drm3btev3e8`, `drm3btev3e86` ].
|
||||
|
||||
The geohash prefixes can be used with the
|
||||
<<query-dsl-geohash-cell-query,`geohash_cell` query>> to find points within a
|
||||
particular geohash, or its neighbours:
|
||||
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "geo_point",
|
||||
"geohash_prefix": true,
|
||||
"geohash_precision": 6
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"location": {
|
||||
"lat": 41.12,
|
||||
"lon": -71.34
|
||||
}
|
||||
}
|
||||
|
||||
GET my_index/_search?fielddata_fields=location.geohash
|
||||
{
|
||||
"query": {
|
||||
"geohash_cell": {
|
||||
"location": {
|
||||
"lat": 41.02,
|
||||
"lon": -71.48
|
||||
},
|
||||
"precision": 4, <1>
|
||||
"neighbors": true <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
[[geohash]]
|
||||
=== `geohash`
|
||||
|
||||
Geohashes are a form of lat/lon encoding which divides the earth up into
|
||||
a grid. Each cell in this grid is represented by a geohash string. Each
|
||||
cell in turn can be further subdivided into smaller cells which are
|
||||
represented by a longer string. So the longer the geohash, the smaller
|
||||
(and thus more accurate) the cell is.
|
||||
|
||||
Because geohashes are just strings, they can be stored in an inverted
|
||||
index like any other string, which makes querying them very efficient.
|
||||
|
||||
If you enable the `geohash` option, a `geohash` ``sub-field'' will be indexed
|
||||
as, eg `.geohash`. The length of the geohash is controlled by the
|
||||
<<geohash-precision,`geohash_precision`>> parameter.
|
||||
|
||||
If the <<geohash-prefix,`geohash_prefix`>> option is enabled, the `geohash`
|
||||
option will be enabled automatically.
|
||||
|
||||
For example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "geo_point", <1>
|
||||
"geohash": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"location": {
|
||||
"lat": 41.12,
|
||||
"lon": -71.34
|
||||
}
|
||||
}
|
||||
|
||||
GET my_index/_search?fielddata_fields=location.geohash <2>
|
||||
{
|
||||
"query": {
|
||||
"prefix": {
|
||||
"location.geohash": "drm3b" <3>
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> A `location.geohash` field will be indexed for each geo-point.
|
||||
<2> The geohash can be retrieved with <<doc-values,`doc_values`>>.
|
||||
<3> A <<query-dsl-prefix-query,`prefix`>> query can find all geohashes which start with a particular prefix.
|
||||
|
||||
[WARNING]
|
||||
============================================
|
||||
|
||||
A `prefix` query on geohashes is expensive. Instead, consider using the
|
||||
<<geohash-prefix,`geohash_prefix`>> to pay the expense once at index time
|
||||
instead of on every query.
|
||||
|
||||
============================================
|
||||
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
[[ignore-above]]
|
||||
=== `ignore_above`
|
||||
|
||||
Strings longer than the `ignore_above` setting will not be processed by the
|
||||
<<analyzer,analyzer>> and will not be indexed. This is mainly useful for
|
||||
<<mapping-index,`not_analyzed`>> string fields, which are typically used for
|
||||
filtering, aggregations, and sorting. These are structured fields and it
|
||||
doesn't usually make sense to allow very long terms to be indexed in these
|
||||
fields.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"message": {
|
||||
"type": "string",
|
||||
"index": "not_analyzed",
|
||||
"ignore_above": 20 <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1 <2>
|
||||
{
|
||||
"message": "Syntax error"
|
||||
}
|
||||
|
||||
PUT my_index/my_type/2 <3>
|
||||
{
|
||||
"message": "Syntax error with some long stacktrace"
|
||||
}
|
||||
|
||||
GET _search <4>
|
||||
{
|
||||
"aggs": {
|
||||
"messages": {
|
||||
"terms": {
|
||||
"field": "message"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> This field will ignore any string longer than 20 characters.
|
||||
<2> This document is indexed successfully.
|
||||
<3> This document will be indexed, but without indexing the `message` field.
|
||||
<4> Search returns both documents, but only the first is present in the terms aggregation.
|
||||
|
||||
This option is also useful for protecting against Lucene's term byte-length
|
||||
limit of `32766`.
|
||||
|
||||
NOTE: The value for `ignore_above` is the _character count_, but Lucene counts
|
||||
bytes. If you use UTF-8 text with many non-ASCII characters, you may want to
|
||||
set the limit to `32766 / 3 = 10922` since UTF-8 characters may occupy at most
|
||||
3 bytes.
|
|
@ -0,0 +1,83 @@
|
|||
[[ignore-malformed]]
|
||||
=== `ignore_malformed`
|
||||
|
||||
Sometimes you don't have much control over the data that you receive. One
|
||||
user may send a `login` field that is a <<date,`date`>>, and another sends a
|
||||
`login` field that is an email address.
|
||||
|
||||
Trying to index the wrong datatype into a field throws an exception by
|
||||
default, and rejects the whole document. The `ignore_malformed` parameter, if
|
||||
set to `true`, allows the exception to be ignored. The malformed field is not
|
||||
indexed, but other fields in the document are processed normally.
|
||||
|
||||
For example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"number_one": {
|
||||
"type": "integer"
|
||||
},
|
||||
"number_two": {
|
||||
"type": "integer",
|
||||
"ignore_malformed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"text": "Some text value",
|
||||
"number_one": "foo" <1>
|
||||
}
|
||||
|
||||
PUT my_index/my_type/2
|
||||
{
|
||||
"text": "Some text value",
|
||||
"number_two": "foo" <2>
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> This document will be rejected because `number_one` does not allow malformed values.
|
||||
<2> This document will have the `text` field indexed, but not the `number_two` field.
|
||||
|
||||
|
||||
[[ignore-malformed-setting]]
|
||||
==== Index-level default
|
||||
|
||||
The `index.mapping.ignore_malformed` setting can be set on the index level to
|
||||
allow to ignore malformed content globally across all mapping types.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"settings": {
|
||||
"index.mapping.ignore_malformed": true <1>
|
||||
},
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"number_one": { <1>
|
||||
"type": "byte"
|
||||
},
|
||||
"number_two": {
|
||||
"type": "integer",
|
||||
"ignore_malformed": false <2>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
<1> The `number_one` field inherits the index-level setting.
|
||||
<2> The `number_two` field overrides the index-level setting to turn off `ignore_malformed`.
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
[[include-in-all]]
|
||||
=== `include_in_all`
|
||||
|
||||
The `include_in_all` parameter provides per-field control over which fields
|
||||
are included in the <<mapping-all-field,`_all`>> field. It defaults to `true`, unless <<mapping-index,`index`>> is set to `no`.
|
||||
|
||||
This example demonstrates how to exclude the `date` field from the `_all` field:
|
||||
|
||||
[source,js]
|
||||
--------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"title": { <1>
|
||||
"type": "string"
|
||||
}
|
||||
"content": { <1>
|
||||
"type": "string"
|
||||
},
|
||||
"date": { <2>
|
||||
"type": "date",
|
||||
"include_in_all": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
<1> The `title` and `content` fields with be included in the `_all` field.
|
||||
<2> The `date` field will not be included in the `_all` field.
|
||||
|
||||
The `include_in_all` parameter can also be set at the type level and on
|
||||
<<object,`object`>> or <<nested,`nested`>> fields, in which case all sub-
|
||||
fields inherit that setting. For instance:
|
||||
|
||||
[source,js]
|
||||
--------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"include_in_all": false, <1>
|
||||
"properties": {
|
||||
"title": { "type": "string" },
|
||||
"author": {
|
||||
"include_in_all": true, <2>
|
||||
"properties": {
|
||||
"first_name": { "type": "string" },
|
||||
"last_name": { "type": "string" }
|
||||
}
|
||||
},
|
||||
"editor": {
|
||||
"properties": {
|
||||
"first_name": { "type": "string" }, <3>
|
||||
"last_name": { "type": "string", "include_in_all": true } <3>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
<1> All fields in `my_type` are excluded from `_all`.
|
||||
<2> The `author.first_name` and `author.last_name` fields are included in `_all`.
|
||||
<3> Only the `editor.last_name` field is included in `_all`.
|
||||
The `editor.first_name` inherits the type-level setting and is excluded.
|
||||
|
||||
[NOTE]
|
||||
.Multi-fields and `include_in_all`
|
||||
=================================
|
||||
|
||||
The original field value is added to the `_all` field, not the terms produced
|
||||
by a field's analyzer. For this reason, it makes no sense to set
|
||||
`include_in_all` to `true` on <<multi-fields,multi-fields>>, as each
|
||||
multi-field has exactly the same value as its parent.
|
||||
|
||||
=================================
|
|
@ -0,0 +1,70 @@
|
|||
[[index-options]]
|
||||
=== `index_options`
|
||||
|
||||
The `index_options` parameter controls what information is added to the
|
||||
inverted index, for search and highlighting purposes. It accepts the
|
||||
following settings:
|
||||
|
||||
[horizontal]
|
||||
`docs`::
|
||||
|
||||
Only the doc number is indexed. Can answer the question _Does this term
|
||||
exist in this field?_
|
||||
|
||||
`freqs`::
|
||||
|
||||
Doc number and term frequencies are indexed. Term frequencies are used to
|
||||
score repeated terms higher than single terms.
|
||||
|
||||
`positions`::
|
||||
|
||||
Doc number, term frequencies, and term positions (or order) are indexed.
|
||||
Positions can be used for
|
||||
<<query-dsl-match-query-phrase,proximity or phrase queries>>.
|
||||
|
||||
`offsets`::
|
||||
|
||||
Doc number, term frequencies, positions, and start and end character
|
||||
offsets (which map the term back to the original string) are indexed.
|
||||
Offsets are used by the <<postings-highlighter,postings highlighter>>.
|
||||
|
||||
<<mapping-index,Analyzed>> string fields use `positions` as the default, and
|
||||
<<all other fields use `docs` as the default.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string",
|
||||
"index_options": "offsets"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"text": "Quick brown fox"
|
||||
}
|
||||
|
||||
GET my_index/_search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"text": "brown fox"
|
||||
}
|
||||
},
|
||||
"highlight": {
|
||||
"fields": {
|
||||
"text": {} <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `text` field will use the postings highlighter by default because `offsets` are indexed.
|
|
@ -0,0 +1,48 @@
|
|||
[[mapping-index]]
|
||||
=== `index`
|
||||
|
||||
The `index` option controls how field values are indexed and, thus, how they
|
||||
are searchable. It accepts three values:
|
||||
|
||||
[horizontal]
|
||||
`no`::
|
||||
|
||||
Do not add this field value to the index. With this setting, the field
|
||||
will not be queryable.
|
||||
|
||||
`not_analyzed`::
|
||||
|
||||
Add the field value to the index unchanged, as a single term. This is the
|
||||
default for all fields that support this option except for
|
||||
<<string,`string`>> fields. `not_analyzed` fields are usually used with
|
||||
<<term-level-queries,term-level queries>> for structured search.
|
||||
|
||||
`analyzed`::
|
||||
|
||||
This option applies only to `string` fields, for which it is the default.
|
||||
The string field value is first <<analysis,analyzed>> to convert the
|
||||
string into terms (e.g. a list of individual words), which are then
|
||||
indexed. At search time, the the query string is passed through
|
||||
(<<search-analyzer,usually>>) the same analyzer to generate terms
|
||||
in the same format as those in the index. It is this process that enables
|
||||
<<full-text-queries,full text search>>.
|
||||
|
||||
For example, you can create a `not_analyzed` string field with the following:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"status_code": {
|
||||
"type": "string",
|
||||
"index": "not_analyzed"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
|
@ -0,0 +1,63 @@
|
|||
[[lat-lon]]
|
||||
=== `lat_lon`
|
||||
|
||||
<<geo-queries,Geo-queries>> are usually performed by plugging the value of
|
||||
each <<geo-point,`geo_point`>> field into a formula to determine whether it
|
||||
falls into the required area or not. Unlike most queries, the inverted index
|
||||
is not involved.
|
||||
|
||||
Setting `lat_lon` to `true` causes the latitude and longitude values to be
|
||||
indexed as numeric fields (called `.lat` and `.lon`). These fields can be used
|
||||
by the <<query-dsl-geo-bounding-box-query,`geo_bounding_box`>> and
|
||||
<<query-dsl-geo-distance-query,`geo_distance`>> queries instead of
|
||||
performing in-memory calculations.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "geo_point",
|
||||
"lat_lon": true <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"location": {
|
||||
"lat": 41.12,
|
||||
"lon": -71.34
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
GET my_index/_search
|
||||
{
|
||||
"query": {
|
||||
"geo_distance": {
|
||||
"location": {
|
||||
"lat": 41,
|
||||
"lon": -71
|
||||
},
|
||||
"distance": "50km",
|
||||
"optimize_bbox": "indexed" <2>
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> Setting `lat_lon` to true indexes the geo-point in the `location.lat` and `location.lon` fields.
|
||||
<2> The `indexed` option tells the geo-distance query to use the inverted index instead of the in-memory calculation.
|
||||
|
||||
Whether the in-memory or indexed operation performs better depends both on
|
||||
your dataset and on the types of queries that you are running.
|
||||
|
||||
NOTE: The `lat_lon` option only makes sense for single-value `geo_point`
|
||||
fields. It will not work with arrays of geo-points.
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
[[multi-fields]]
|
||||
=== `fields`
|
||||
|
||||
It is often useful to index the same field in different ways for different
|
||||
purposes. This is the purpose of _multi-fields_. For instance, a `string`
|
||||
field could be <<mapping-index,indexed>> as an `analyzed` field for full-text
|
||||
search, and as a `not_analyzed` field for sorting or aggregations:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"city": {
|
||||
"type": "string",
|
||||
"fields": {
|
||||
"raw": { <1>
|
||||
"type": "string",
|
||||
"index": "not_analyzed"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT /my_index/my_type/1
|
||||
{
|
||||
"city": "New York"
|
||||
}
|
||||
|
||||
PUT /my_index/my_type/2
|
||||
{
|
||||
"city": "York"
|
||||
}
|
||||
|
||||
GET /my_index/_search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"city": "york" <2>
|
||||
}
|
||||
},
|
||||
"sort": {
|
||||
"city.raw": "asc" <3>
|
||||
},
|
||||
"aggs": {
|
||||
"Cities": {
|
||||
"terms": {
|
||||
"field": "city.raw" <3>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `city.raw` field is a `not_analyzed` version of the `city` field.
|
||||
<2> The analyzed `city` field can be used for full text search.
|
||||
<3> The `city.raw` field can be used for sorting and aggregations
|
||||
|
||||
NOTE: Multi-fields do not change the original `_source` field.
|
||||
|
||||
==== Multi-fields with multiple analyzers
|
||||
|
||||
Another use case of multi-fields is to analyze the same field in different
|
||||
ways for better relevance. For instance we could index a field with the
|
||||
<<analysis-standard-analyzer,`standard` analyzer>> which breaks text up into
|
||||
words, and again with the <<english-analyzer,`english` analyzer>>
|
||||
which stems words into their root form:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"text": { <1>
|
||||
"type": "string"
|
||||
},
|
||||
"fields": {
|
||||
"english": { <2>
|
||||
"type": "string",
|
||||
"analyzer": "english"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{ "text": "quick brown fox" } <3>
|
||||
|
||||
PUT my_index/my_type/2
|
||||
{ "text": "quick brown foxes" } <3>
|
||||
|
||||
GET my_index/_search
|
||||
{
|
||||
"query": {
|
||||
"multi_match": {
|
||||
"query": "quick brown foxes",
|
||||
"fields": [ <4>
|
||||
"text",
|
||||
"text.english"
|
||||
],
|
||||
"type": "most_fields" <4>
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
<1> The `text` field uses the `standard` analyzer.
|
||||
<2> The `text.english` field uses the `english` analyzer.
|
||||
<3> Index two documents, one with `fox` and the other with `foxes`.
|
||||
<4> Query both the `text` and `text.english` fields and combine the scores.
|
||||
|
||||
The `text` field contains the term `fox` in the first document and `foxes` in
|
||||
the second document. The `text.english` field contains `fox` for both
|
||||
documents, because `foxes` is stemmed to `fox`.
|
||||
|
||||
The query string is also analyzed by the `standard` analyzer for the `text`
|
||||
field, and by the `english` analyzer` for the `text.english` field. The
|
||||
stemmed field allows a query for `foxes` to also match the document containing
|
||||
just `fox`. This allows us to match as many documents as possible. By also
|
||||
querying the unstemmed `text` field, we improve the relevance score of the
|
||||
document which matches `foxes` exactly.
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
[[norms]]
|
||||
=== `norms`
|
||||
|
||||
Norms store various normalization factors -- a number to represent the
|
||||
relative field length and the <<index-boost,index time `boost`>> setting --
|
||||
that are later used at query time in order to compute the score of a document
|
||||
relatively to a query.
|
||||
|
||||
Although useful for scoring, norms also require quite a lot of memory
|
||||
(typically in the order of one byte per document per field in your index, even
|
||||
for documents that don't have this specific field). As a consequence, if you
|
||||
don't need scoring on a specific field, you should disable norms on that
|
||||
field. In particular, this is the case for fields that are used solely for
|
||||
filtering or aggregations.
|
||||
|
||||
Norms can be disabled (but not reenabled) after the fact, using the
|
||||
<<indices-put-mapping,PUT mapping API>> like so:
|
||||
|
||||
[source,js]
|
||||
------------
|
||||
PUT my_index/_mapping/my_type
|
||||
{
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string",
|
||||
"norms": {
|
||||
"enabled": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
------------
|
||||
// AUTOSENSE
|
||||
|
||||
NOTE: Norms will not be removed instantly, but will be removed as old segments
|
||||
are merged into new segments as you continue indexing new documents. Any score
|
||||
computation on a field that has had norms removed might return inconsistent
|
||||
results since some documents won't have norms anymore while other documents
|
||||
might still have norms.
|
||||
|
||||
==== Lazy loading of norms
|
||||
|
||||
Norms can be loaded into memory eagerly (`eager`), whenever a new segment
|
||||
comes online, or they can loaded lazily (`lazy`, default), only when the field
|
||||
is queried.
|
||||
|
||||
Eager loading can be configured as follows:
|
||||
|
||||
[source,js]
|
||||
------------
|
||||
PUT my_index/_mapping/my_type
|
||||
{
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string",
|
||||
"norms": {
|
||||
"loading": "eager"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
------------
|
||||
// AUTOSENSE
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
[[null-value]]
|
||||
=== `null_value`
|
||||
|
||||
A `null` value cannot be indexed or searched. When a field is set to `null`,
|
||||
(or an empty array or an array of `null` values) it is treated as though that
|
||||
field has no values.
|
||||
|
||||
The `null_value` parameter allows you to replace explicit `null` values with
|
||||
the specified value so that it can be indexed and searched. For instance:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"status_code": {
|
||||
"type": "string",
|
||||
"index": "not_analyzed",
|
||||
"null_value": "NULL" <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"status_code": null
|
||||
}
|
||||
|
||||
PUT my_index/my_type/2
|
||||
{
|
||||
"status_code": [] <2>
|
||||
}
|
||||
|
||||
GET my_index/_search
|
||||
{
|
||||
"query": {
|
||||
"term": {
|
||||
"status_code": "NULL" <3>
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> Replace explicit `null` values with the term `NULL`.
|
||||
<2> An empty array does not contain an explicit `null`, and so won't be replaced with the `null_value`.
|
||||
<3> A query for `NULL` returns document 1, but not document 2.
|
||||
|
||||
IMPORTANT: The `null_value` needs to be the same datatype as the field. For
|
||||
instance, a `long` field cannot have a string `null_value`. String fields
|
||||
which are `analyzed` will also pass the `null_value` through the configured
|
||||
analyzer.
|
||||
|
||||
Also see the <<query-dsl-missing-query,`missing` query>> for its `null_value` support.
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
[[position-offset-gap]]
|
||||
=== `position_offset_gap`
|
||||
|
||||
<<mapping-index,Analyzed>> string fields take term <<index-options,positions>>
|
||||
into account, in order to be able to support
|
||||
<<query-dsl-match-query-phrase,proximity or phrase queries>>.
|
||||
When indexing an array of strings, each string of the array is indexed
|
||||
directly after the previous one, almost as though all the strings in the array
|
||||
had been concatenated into one big string.
|
||||
|
||||
This can result in matches from phrase queries spanning two array elements.
|
||||
For instance:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /my_index/groups/1
|
||||
{
|
||||
"names": [ "John Abraham", "Lincoln Smith"]
|
||||
}
|
||||
|
||||
GET /my_index/groups/_search
|
||||
{
|
||||
"query": {
|
||||
"match_phrase": {
|
||||
"names": "Abraham Lincoln" <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> This phrase query matches our document, even though `Abraham` and `Lincoln` are in separate strings.
|
||||
|
||||
The `position_offset_gap` can introduce a fake gap between each array element. For instance:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"names": {
|
||||
"type": "string",
|
||||
"position_offset_gap": 50 <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT /my_index/groups/1
|
||||
{
|
||||
"names": [ "John Abraham", "Lincoln Smith"]
|
||||
}
|
||||
|
||||
GET /my_index/groups/_search
|
||||
{
|
||||
"query": {
|
||||
"match_phrase": {
|
||||
"names": "Abraham Lincoln" <2>
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The first term in the next array element will be 50 terms apart from the
|
||||
last term in the previous array element.
|
||||
<2> The phrase query no longer matches our document.
|
|
@ -0,0 +1,56 @@
|
|||
[[precision-step]]
|
||||
=== `precision_step`
|
||||
|
||||
Most <<number,numeric>> datatypes index extra terms representing numeric
|
||||
ranges for each number to make <<query-dsl-range-query,`range` queries>>
|
||||
faster. For instance, this `range` query:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"range": {
|
||||
"number": {
|
||||
"gte": 0
|
||||
"lte": 321
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
might be executed internally as a <<query-dsl-terms-query,`terms` query>> that
|
||||
looks something like this:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"terms": {
|
||||
"number": [
|
||||
"0-255",
|
||||
"256-319"
|
||||
"320",
|
||||
"321"
|
||||
]
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
These extra terms greatly reduce the number of terms that have to be examined,
|
||||
at the cost of increased disk space.
|
||||
|
||||
The default value for `precision_step` depends on the `type` of the numeric field:
|
||||
|
||||
[horizontal]
|
||||
`long`, `double`, `date`, `ip`:: `16` (3 extra terms)
|
||||
`integer`, `float`, `short`:: `8` (3 extra terms)
|
||||
`byte`:: `2147483647` (0 extra terms)
|
||||
`token_count`:: `32` (0 extra terms)
|
||||
|
||||
The value of the `precision_step` setting indicates the number of bits that
|
||||
should be compressed into an extra term. A `long` value consists of 64 bits,
|
||||
so a `precision_step` of 16 results in the following terms:
|
||||
|
||||
[horizontal]
|
||||
Bits 0-15:: `value & 1111111111111111 0000000000000000 0000000000000000 0000000000000000`
|
||||
Bits 0-31:: `value & 1111111111111111 1111111111111111 0000000000000000 0000000000000000`
|
||||
Bits 0-47:: `value & 1111111111111111 1111111111111111 1111111111111111 0000000000000000`
|
||||
Bits 0-63:: `value`
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
[[properties]]
|
||||
=== `properties`
|
||||
|
||||
Type mappings, <<object,`object` fields>> and <<nested,`nested` fields>>
|
||||
contain sub-fields, called `properties`. These properties may be of any
|
||||
<<mapping-types,datatype>>, including `object` and `nested`. Properties can
|
||||
be added:
|
||||
|
||||
* explicitly by defining them when <<indices-create-index,creating an index>>.
|
||||
* explicitily by defining them when adding or updating a mapping type with the <<indices-put-mapping,PUT mapping>> API.
|
||||
* <<dynamic-mapping,dynamically>> just by indexing documents containing new fields.
|
||||
|
||||
Below is an example of adding `properties` to a mapping type, an `object`
|
||||
field, and a `nested` field:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": { <1>
|
||||
"properties": {
|
||||
"manager": { <2>
|
||||
"properties": {
|
||||
"age": { "type": "integer" },
|
||||
"name": { "type": "string" }
|
||||
}
|
||||
},
|
||||
"employees": { <3>
|
||||
"type": "nested",
|
||||
"properties": {
|
||||
"age": { "type": "integer" },
|
||||
"name": { "type": "string" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1 <4>
|
||||
{
|
||||
"region": "US",
|
||||
"manager": {
|
||||
"name": "Alice White",
|
||||
"age": 30
|
||||
},
|
||||
"employees": [
|
||||
{
|
||||
"name": "John Smith",
|
||||
"age": 34
|
||||
},
|
||||
{
|
||||
"name": "Peter Brown",
|
||||
"age": 26
|
||||
}
|
||||
]
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> Properties under the `my_type` mapping type.
|
||||
<2> Properties under the `manager` object field.
|
||||
<3> Properties under the `employees` nested field.
|
||||
<4> An example document which corresponds to the above mapping.
|
||||
|
||||
==== Dot notation
|
||||
|
||||
Inner fields can be referred to in queries, aggregations, etc., using _dot
|
||||
notation_:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET my_index/_search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"manager.name": "Alice White" <1>
|
||||
}
|
||||
},
|
||||
"aggs": {
|
||||
"Employees": {
|
||||
"nested": {
|
||||
"path": "employees"
|
||||
},
|
||||
"aggs": {
|
||||
"Employee Ages": {
|
||||
"histogram": {
|
||||
"field": "employees.age", <2>
|
||||
"interval": 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
IMPORTANT: The full path to the inner field must be specified.
|
||||
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
[[search-analyzer]]
|
||||
=== `search_analyzer`
|
||||
|
||||
Usually, the same <<analyzer,analyzer>> should be applied at index time and at
|
||||
search time, to ensure that the terms in the query are in the same format as
|
||||
the terms in the inverted index.
|
||||
|
||||
Sometimes, though, it can make sense to use a different analyzer at search
|
||||
time, such as when using the <<analysis-edgengram-tokenizer,`edge_ngram`>>
|
||||
tokenizer for autocomplete.
|
||||
|
||||
By default, queries will use the `analyzer` defined in the field mapping, but
|
||||
this can be overridden with the `search_analyzer` setting:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /my_index
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"filter": {
|
||||
"autocomplete_filter": {
|
||||
"type": "edge_ngram",
|
||||
"min_gram": 1,
|
||||
"max_gram": 20
|
||||
}
|
||||
},
|
||||
"analyzer": {
|
||||
"autocomplete": { <1>
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": [
|
||||
"lowercase",
|
||||
"autocomplete_filter"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string",
|
||||
"analyzer": "autocomplete", <2>
|
||||
"search_analyzer": "standard" <2>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT my_index/my_type/1
|
||||
{
|
||||
"text": "Quick Brown Fox" <3>
|
||||
}
|
||||
|
||||
GET my_index/_search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"text": {
|
||||
"query": "Quick Br", <4>
|
||||
"operator": "and"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
<1> Analysis settings to define the custom `autocomplete` analyzer.
|
||||
<2> The `text` field uses the `autocomplete` analyzer at index time, but the `standard` analyzer at search time.
|
||||
<3> This field is indexed as the terms: [ `q`, `qu`, `qui`, `quic`, `quick`, `b`, `br`, `bro`, `brow`, `brown`, `f`, `fo`, `fox` ]
|
||||
<4> The query searches for both of these terms: [ `quick`, `br` ]
|
||||
|
||||
See {defguide}/_index_time_search_as_you_type.html[Index time search-as-you-
|
||||
type] for a full explanation of this example.
|
|
@ -0,0 +1,54 @@
|
|||
[[similarity]]
|
||||
=== `similarity`
|
||||
|
||||
Elasticsearch allows you to configure a scoring algorithm or _similarity_ per
|
||||
field. The `similarity` setting provides a simple way of choosing a similarity
|
||||
algorithm other than the default TF/IDF, such as `BM25`.
|
||||
|
||||
Similarities are mostly useful for <<string,`string`>> fields, especially
|
||||
`analyzed` string fields, but can also apply to other field types.
|
||||
|
||||
Custom similarites can be configured by tuning the parameters of the built-in
|
||||
similarities. For more details about this expert options, see the
|
||||
<<index-modules-similarity,similarity module>>.
|
||||
|
||||
The only similarities which can be used out of the box, without any further
|
||||
configuration are:
|
||||
|
||||
`default`::
|
||||
The Default TF/IDF algorithm used by Elasticsearch and
|
||||
Lucene. See {defguide}/practical-scoring-function.html[Lucene’s Practical Scoring Function]
|
||||
for more information.
|
||||
|
||||
`BM25`::
|
||||
The Okapi BM25 algorithm.
|
||||
See {defguide}/pluggable-similarites.html[Plugggable Similarity Algorithms]
|
||||
for more information.
|
||||
|
||||
|
||||
The `similarity` can be set on the field level when a field is first created,
|
||||
as follows:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"default_field": { <1>
|
||||
"type": "string"
|
||||
},
|
||||
"bm25_field": {
|
||||
"type": "string",
|
||||
"similarity": "BM25" <2>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `default_field` uses the `default` similarity (ie TF/IDF).
|
||||
<2> The `bm25_field` uses the `BM25` similarity.
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
[[mapping-store]]
|
||||
=== `store`
|
||||
|
||||
By default, field values <<mapping-index,indexed>> to make them searchable,
|
||||
but they are not _stored_. This means that the field can be queried, but the
|
||||
original field value cannot be retrieved.
|
||||
|
||||
Usually this doesn't matter. The field value is already part of the
|
||||
<<mapping-source-field,`_source` field>>, which is stored by default. If you
|
||||
only want to retrieve the value of a single field or of a few fields, instead
|
||||
of the whole `_source`, then this can be achieved with
|
||||
<<search-request-source-filtering,source filtering>>.
|
||||
|
||||
In certain situations it can make sense to `store` a field. For instance, if
|
||||
you have a document with a `title`, a `date`, and a very large `content`
|
||||
field, you may want to retrieve just the `title` and the `date` without having
|
||||
to extract those fields from a large `_source` field:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /my_index
|
||||
{
|
||||
"mappings": {
|
||||
"my_type": {
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string",
|
||||
"store": true <1>
|
||||
},
|
||||
"date": {
|
||||
"type": "date",
|
||||
"store": true <1>
|
||||
},
|
||||
"content": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT /my_index/my_type/1
|
||||
{
|
||||
"title": "Some short title",
|
||||
"date": "2015-01-01",
|
||||
"content": "A very long content field..."
|
||||
}
|
||||
|
||||
GET my_index/_search
|
||||
{
|
||||
"fields": [ "title", "date" ] <2>
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> The `title` and `date` fields are stored.
|
||||
<2> This request will retrieve the values of the `title` and `date` fields.
|
||||
|
||||
[NOTE]
|
||||
.Stored fields returned as arrays
|
||||
======================================
|
||||
|
||||
For consistency, stored fields are always returned as an _array_ because there
|
||||
is no way of knowing if the original field value was a single value, multiple
|
||||
values, or an empty array.
|
||||
|
||||
If you need the original value, you should retrieve it from the `_source`
|
||||
field instead.
|
||||
|
||||
======================================
|
||||
|
||||
Another situation where it can make sense to make a field stored is for those
|
||||
that don't appear in the `_source` field (such as <<copy-to,`copy_to` fields>>).
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue