Merge branch 'master' into integ_randomization
This commit is contained in:
commit
d1a5068b77
|
@ -14,7 +14,7 @@ docs/html/
|
||||||
docs/build.log
|
docs/build.log
|
||||||
/tmp/
|
/tmp/
|
||||||
backwards/
|
backwards/
|
||||||
|
html_docs
|
||||||
## eclipse ignores (use 'mvn eclipse:eclipse' to build eclipse projects)
|
## eclipse ignores (use 'mvn eclipse:eclipse' to build eclipse projects)
|
||||||
## All files (.project, .classpath, .settings/*) should be generated through Maven which
|
## All files (.project, .classpath, .settings/*) should be generated through Maven which
|
||||||
## will correctly set the classpath based on the declared dependencies and write settings
|
## will correctly set the classpath based on the declared dependencies and write settings
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||||
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -62,13 +63,17 @@ import java.util.List;
|
||||||
public abstract class BlendedTermQuery extends Query {
|
public abstract class BlendedTermQuery extends Query {
|
||||||
|
|
||||||
private final Term[] terms;
|
private final Term[] terms;
|
||||||
|
private final float[] boosts;
|
||||||
|
|
||||||
|
public BlendedTermQuery(Term[] terms, float[] boosts) {
|
||||||
public BlendedTermQuery(Term[] terms) {
|
|
||||||
if (terms == null || terms.length == 0) {
|
if (terms == null || terms.length == 0) {
|
||||||
throw new IllegalArgumentException("terms must not be null or empty");
|
throw new IllegalArgumentException("terms must not be null or empty");
|
||||||
}
|
}
|
||||||
|
if (boosts != null && boosts.length != terms.length) {
|
||||||
|
throw new IllegalArgumentException("boosts must have the same size as terms");
|
||||||
|
}
|
||||||
this.terms = terms;
|
this.terms = terms;
|
||||||
|
this.boosts = boosts;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -231,8 +236,22 @@ public abstract class BlendedTermQuery extends Query {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString(String field) {
|
public String toString(String field) {
|
||||||
return "blended(terms: " + Arrays.toString(terms) + ")";
|
StringBuilder builder = new StringBuilder("blended(terms:[");
|
||||||
|
for (int i = 0; i < terms.length; ++i) {
|
||||||
|
builder.append(terms[i]);
|
||||||
|
float boost = 1f;
|
||||||
|
if (boosts != null) {
|
||||||
|
boost = boosts[i];
|
||||||
|
}
|
||||||
|
builder.append(ToStringUtils.boost(boost));
|
||||||
|
builder.append(", ");
|
||||||
|
}
|
||||||
|
if (terms.length > 0) {
|
||||||
|
builder.setLength(builder.length() - 2);
|
||||||
|
}
|
||||||
|
builder.append("])");
|
||||||
|
builder.append(ToStringUtils.boost(getBoost()));
|
||||||
|
return builder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private volatile Term[] equalTerms = null;
|
private volatile Term[] equalTerms = null;
|
||||||
|
@ -277,7 +296,7 @@ public abstract class BlendedTermQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) {
|
public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) {
|
||||||
return new BlendedTermQuery(terms) {
|
return new BlendedTermQuery(terms, boosts) {
|
||||||
@Override
|
@Override
|
||||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||||
BooleanQuery query = new BooleanQuery(disableCoord);
|
BooleanQuery query = new BooleanQuery(disableCoord);
|
||||||
|
@ -294,7 +313,7 @@ public abstract class BlendedTermQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) {
|
public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) {
|
||||||
return new BlendedTermQuery(terms) {
|
return new BlendedTermQuery(terms, boosts) {
|
||||||
@Override
|
@Override
|
||||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||||
BooleanQuery query = new BooleanQuery(true);
|
BooleanQuery query = new BooleanQuery(true);
|
||||||
|
@ -334,7 +353,7 @@ public abstract class BlendedTermQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) {
|
public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) {
|
||||||
return new BlendedTermQuery(terms) {
|
return new BlendedTermQuery(terms, boosts) {
|
||||||
@Override
|
@Override
|
||||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||||
DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);
|
DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);
|
||||||
|
|
|
@ -272,13 +272,13 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
|
||||||
} catch (IndexNotFoundException e) {
|
} catch (IndexNotFoundException e) {
|
||||||
// one of the specified indices is not there - treat it as RED.
|
// one of the specified indices is not there - treat it as RED.
|
||||||
ClusterHealthResponse response = new ClusterHealthResponse(clusterName.value(), Strings.EMPTY_ARRAY, clusterState,
|
ClusterHealthResponse response = new ClusterHealthResponse(clusterName.value(), Strings.EMPTY_ARRAY, clusterState,
|
||||||
numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(settings, clusterState),
|
numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(), settings, clusterState),
|
||||||
pendingTaskTimeInQueue);
|
pendingTaskTimeInQueue);
|
||||||
response.status = ClusterHealthStatus.RED;
|
response.status = ClusterHealthStatus.RED;
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ClusterHealthResponse(clusterName.value(), concreteIndices, clusterState, numberOfPendingTasks,
|
return new ClusterHealthResponse(clusterName.value(), concreteIndices, clusterState, numberOfPendingTasks,
|
||||||
numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(settings, clusterState), pendingTaskTimeInQueue);
|
numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(), settings, clusterState), pendingTaskTimeInQueue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,7 +59,7 @@ final class JVMCheck {
|
||||||
/** Returns an error message to the user for a broken version */
|
/** Returns an error message to the user for a broken version */
|
||||||
String getErrorMessage() {
|
String getErrorMessage() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append("Java version: ").append(Constants.JAVA_VERSION);
|
sb.append("Java version: ").append(fullVersion());
|
||||||
sb.append(" suffers from critical bug ").append(bugUrl);
|
sb.append(" suffers from critical bug ").append(bugUrl);
|
||||||
sb.append(" which can cause data corruption.");
|
sb.append(" which can cause data corruption.");
|
||||||
sb.append(System.lineSeparator());
|
sb.append(System.lineSeparator());
|
||||||
|
@ -111,7 +111,7 @@ final class JVMCheck {
|
||||||
*/
|
*/
|
||||||
static void check() {
|
static void check() {
|
||||||
if (Boolean.parseBoolean(System.getProperty(JVM_BYPASS))) {
|
if (Boolean.parseBoolean(System.getProperty(JVM_BYPASS))) {
|
||||||
Loggers.getLogger(JVMCheck.class).warn("bypassing jvm version check for version [{}], this can result in data corruption!", Constants.JAVA_VERSION);
|
Loggers.getLogger(JVMCheck.class).warn("bypassing jvm version check for version [{}], this can result in data corruption!", fullVersion());
|
||||||
} else if ("Oracle Corporation".equals(Constants.JVM_VENDOR)) {
|
} else if ("Oracle Corporation".equals(Constants.JVM_VENDOR)) {
|
||||||
HotspotBug bug = JVM_BROKEN_HOTSPOT_VERSIONS.get(Constants.JVM_VERSION);
|
HotspotBug bug = JVM_BROKEN_HOTSPOT_VERSIONS.get(Constants.JVM_VERSION);
|
||||||
if (bug != null) {
|
if (bug != null) {
|
||||||
|
@ -135,11 +135,28 @@ final class JVMCheck {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append("IBM J9 runtimes < 2.8 suffer from several bugs which can cause data corruption.");
|
sb.append("IBM J9 runtimes < 2.8 suffer from several bugs which can cause data corruption.");
|
||||||
sb.append(System.lineSeparator());
|
sb.append(System.lineSeparator());
|
||||||
sb.append("Your version: " + Constants.JVM_VERSION);
|
sb.append("Your version: " + fullVersion());
|
||||||
sb.append(System.lineSeparator());
|
sb.append(System.lineSeparator());
|
||||||
sb.append("Please upgrade the JVM to a recent IBM JDK");
|
sb.append("Please upgrade the JVM to a recent IBM JDK");
|
||||||
throw new RuntimeException(sb.toString());
|
throw new RuntimeException(sb.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns java + jvm version, looks like this:
|
||||||
|
* {@code Oracle Corporation 1.8.0_45 [Java HotSpot(TM) 64-Bit Server VM 25.45-b02]}
|
||||||
|
*/
|
||||||
|
static String fullVersion() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append(Constants.JAVA_VENDOR);
|
||||||
|
sb.append(" ");
|
||||||
|
sb.append(Constants.JAVA_VERSION);
|
||||||
|
sb.append(" [");
|
||||||
|
sb.append(Constants.JVM_NAME);
|
||||||
|
sb.append(" ");
|
||||||
|
sb.append(Constants.JVM_VERSION);
|
||||||
|
sb.append("]");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,6 +57,7 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
|
||||||
private AtomicBoolean rerouting = new AtomicBoolean();
|
private AtomicBoolean rerouting = new AtomicBoolean();
|
||||||
private volatile long registeredNextDelaySetting = Long.MAX_VALUE;
|
private volatile long registeredNextDelaySetting = Long.MAX_VALUE;
|
||||||
private volatile ScheduledFuture registeredNextDelayFuture;
|
private volatile ScheduledFuture registeredNextDelayFuture;
|
||||||
|
private volatile long unassignedShardsAllocatedTimestamp = 0;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public RoutingService(Settings settings, ThreadPool threadPool, ClusterService clusterService, AllocationService allocationService) {
|
public RoutingService(Settings settings, ThreadPool threadPool, ClusterService clusterService, AllocationService allocationService) {
|
||||||
|
@ -87,6 +88,19 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
|
||||||
return this.allocationService;
|
return this.allocationService;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the last time the allocator tried to assign unassigned shards
|
||||||
|
*
|
||||||
|
* This is used so that both the GatewayAllocator and RoutingService use a
|
||||||
|
* consistent timestamp for comparing which shards have been delayed to
|
||||||
|
* avoid a race condition where GatewayAllocator thinks the shard should
|
||||||
|
* be delayed and the RoutingService thinks it has already passed the delay
|
||||||
|
* and that the GatewayAllocator has/will handle it.
|
||||||
|
*/
|
||||||
|
public void setUnassignedShardsAllocatedTimestamp(long timeInMillis) {
|
||||||
|
this.unassignedShardsAllocatedTimestamp = timeInMillis;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initiates a reroute.
|
* Initiates a reroute.
|
||||||
*/
|
*/
|
||||||
|
@ -108,20 +122,29 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
|
||||||
if (nextDelaySetting > 0 && nextDelaySetting < registeredNextDelaySetting) {
|
if (nextDelaySetting > 0 && nextDelaySetting < registeredNextDelaySetting) {
|
||||||
FutureUtils.cancel(registeredNextDelayFuture);
|
FutureUtils.cancel(registeredNextDelayFuture);
|
||||||
registeredNextDelaySetting = nextDelaySetting;
|
registeredNextDelaySetting = nextDelaySetting;
|
||||||
TimeValue nextDelay = TimeValue.timeValueMillis(UnassignedInfo.findNextDelayedAllocationIn(settings, event.state()));
|
// We use System.currentTimeMillis here because we want the
|
||||||
logger.info("delaying allocation for [{}] unassigned shards, next check in [{}]", UnassignedInfo.getNumberOfDelayedUnassigned(settings, event.state()), nextDelay);
|
// next delay from the "now" perspective, rather than the
|
||||||
registeredNextDelayFuture = threadPool.schedule(nextDelay, ThreadPool.Names.SAME, new AbstractRunnable() {
|
// delay from the last time the GatewayAllocator tried to
|
||||||
@Override
|
// assign/delay the shard
|
||||||
protected void doRun() throws Exception {
|
TimeValue nextDelay = TimeValue.timeValueMillis(UnassignedInfo.findNextDelayedAllocationIn(System.currentTimeMillis(), settings, event.state()));
|
||||||
registeredNextDelaySetting = Long.MAX_VALUE;
|
int unassignedDelayedShards = UnassignedInfo.getNumberOfDelayedUnassigned(unassignedShardsAllocatedTimestamp, settings, event.state());
|
||||||
reroute("assign delayed unassigned shards");
|
if (unassignedDelayedShards > 0) {
|
||||||
}
|
logger.info("delaying allocation for [{}] unassigned shards, next check in [{}]",
|
||||||
|
unassignedDelayedShards, nextDelay);
|
||||||
|
registeredNextDelayFuture = threadPool.schedule(nextDelay, ThreadPool.Names.SAME, new AbstractRunnable() {
|
||||||
|
@Override
|
||||||
|
protected void doRun() throws Exception {
|
||||||
|
registeredNextDelaySetting = Long.MAX_VALUE;
|
||||||
|
reroute("assign delayed unassigned shards");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFailure(Throwable t) {
|
public void onFailure(Throwable t) {
|
||||||
logger.warn("failed to schedule/execute reroute post unassigned shard", t);
|
logger.warn("failed to schedule/execute reroute post unassigned shard", t);
|
||||||
}
|
registeredNextDelaySetting = Long.MAX_VALUE;
|
||||||
});
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.trace("no need to schedule reroute due to delayed unassigned, next_delay_setting [{}], registered [{}]", nextDelaySetting, registeredNextDelaySetting);
|
logger.trace("no need to schedule reroute due to delayed unassigned, next_delay_setting [{}], registered [{}]", nextDelaySetting, registeredNextDelaySetting);
|
||||||
}
|
}
|
||||||
|
|
|
@ -199,12 +199,12 @@ public class UnassignedInfo implements ToXContent, Writeable<UnassignedInfo> {
|
||||||
/**
|
/**
|
||||||
* The time in millisecond until this unassigned shard can be reassigned.
|
* The time in millisecond until this unassigned shard can be reassigned.
|
||||||
*/
|
*/
|
||||||
public long getDelayAllocationExpirationIn(Settings settings, Settings indexSettings) {
|
public long getDelayAllocationExpirationIn(long unassignedShardsAllocatedTimestamp, Settings settings, Settings indexSettings) {
|
||||||
long delayTimeout = getAllocationDelayTimeoutSetting(settings, indexSettings);
|
long delayTimeout = getAllocationDelayTimeoutSetting(settings, indexSettings);
|
||||||
if (delayTimeout == 0) {
|
if (delayTimeout == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
long delta = System.currentTimeMillis() - timestamp;
|
long delta = unassignedShardsAllocatedTimestamp - timestamp;
|
||||||
// account for time drift, treat it as no timeout
|
// account for time drift, treat it as no timeout
|
||||||
if (delta < 0) {
|
if (delta < 0) {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -216,12 +216,12 @@ public class UnassignedInfo implements ToXContent, Writeable<UnassignedInfo> {
|
||||||
/**
|
/**
|
||||||
* Returns the number of shards that are unassigned and currently being delayed.
|
* Returns the number of shards that are unassigned and currently being delayed.
|
||||||
*/
|
*/
|
||||||
public static int getNumberOfDelayedUnassigned(Settings settings, ClusterState state) {
|
public static int getNumberOfDelayedUnassigned(long unassignedShardsAllocatedTimestamp, Settings settings, ClusterState state) {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for (ShardRouting shard : state.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED)) {
|
for (ShardRouting shard : state.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED)) {
|
||||||
if (shard.primary() == false) {
|
if (shard.primary() == false) {
|
||||||
IndexMetaData indexMetaData = state.metaData().index(shard.getIndex());
|
IndexMetaData indexMetaData = state.metaData().index(shard.getIndex());
|
||||||
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(settings, indexMetaData.getSettings());
|
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(unassignedShardsAllocatedTimestamp, settings, indexMetaData.getSettings());
|
||||||
if (delay > 0) {
|
if (delay > 0) {
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
@ -251,12 +251,12 @@ public class UnassignedInfo implements ToXContent, Writeable<UnassignedInfo> {
|
||||||
/**
|
/**
|
||||||
* Finds the next (closest) delay expiration of an unassigned shard. Returns 0 if there are none.
|
* Finds the next (closest) delay expiration of an unassigned shard. Returns 0 if there are none.
|
||||||
*/
|
*/
|
||||||
public static long findNextDelayedAllocationIn(Settings settings, ClusterState state) {
|
public static long findNextDelayedAllocationIn(long unassignedShardsAllocatedTimestamp, Settings settings, ClusterState state) {
|
||||||
long nextDelay = Long.MAX_VALUE;
|
long nextDelay = Long.MAX_VALUE;
|
||||||
for (ShardRouting shard : state.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED)) {
|
for (ShardRouting shard : state.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED)) {
|
||||||
if (shard.primary() == false) {
|
if (shard.primary() == false) {
|
||||||
IndexMetaData indexMetaData = state.metaData().index(shard.getIndex());
|
IndexMetaData indexMetaData = state.metaData().index(shard.getIndex());
|
||||||
long nextShardDelay = shard.unassignedInfo().getDelayAllocationExpirationIn(settings, indexMetaData.getSettings());
|
long nextShardDelay = shard.unassignedInfo().getDelayAllocationExpirationIn(unassignedShardsAllocatedTimestamp, settings, indexMetaData.getSettings());
|
||||||
if (nextShardDelay > 0 && nextShardDelay < nextDelay) {
|
if (nextShardDelay > 0 && nextShardDelay < nextDelay) {
|
||||||
nextDelay = nextShardDelay;
|
nextDelay = nextShardDelay;
|
||||||
}
|
}
|
||||||
|
|
|
@ -128,9 +128,7 @@ public class ScriptScoreFunction extends ScoreFunction {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean needsScores() {
|
public boolean needsScores() {
|
||||||
// Scripts might use _score so we return true here
|
return script.needsScores();
|
||||||
// TODO: Make scripts able to tell us whether they use scores
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -113,6 +113,10 @@ public class GatewayAllocator extends AbstractComponent {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean allocateUnassigned(final RoutingAllocation allocation) {
|
public boolean allocateUnassigned(final RoutingAllocation allocation) {
|
||||||
|
// Take a snapshot of the current time and tell the RoutingService
|
||||||
|
// about it, so it will use a consistent timestamp for delays
|
||||||
|
long lastAllocateUnassignedRun = System.currentTimeMillis();
|
||||||
|
this.routingService.setUnassignedShardsAllocatedTimestamp(lastAllocateUnassignedRun);
|
||||||
boolean changed = false;
|
boolean changed = false;
|
||||||
|
|
||||||
RoutingNodes.UnassignedShards unassigned = allocation.routingNodes().unassigned();
|
RoutingNodes.UnassignedShards unassigned = allocation.routingNodes().unassigned();
|
||||||
|
@ -127,7 +131,7 @@ public class GatewayAllocator extends AbstractComponent {
|
||||||
|
|
||||||
changed |= primaryShardAllocator.allocateUnassigned(allocation);
|
changed |= primaryShardAllocator.allocateUnassigned(allocation);
|
||||||
changed |= replicaShardAllocator.processExistingRecoveries(allocation);
|
changed |= replicaShardAllocator.processExistingRecoveries(allocation);
|
||||||
changed |= replicaShardAllocator.allocateUnassigned(allocation);
|
changed |= replicaShardAllocator.allocateUnassigned(allocation, lastAllocateUnassignedRun);
|
||||||
return changed;
|
return changed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -111,6 +111,10 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean allocateUnassigned(RoutingAllocation allocation) {
|
public boolean allocateUnassigned(RoutingAllocation allocation) {
|
||||||
|
return allocateUnassigned(allocation, System.currentTimeMillis());
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean allocateUnassigned(RoutingAllocation allocation, long allocateUnassignedTimestapm) {
|
||||||
boolean changed = false;
|
boolean changed = false;
|
||||||
final RoutingNodes routingNodes = allocation.routingNodes();
|
final RoutingNodes routingNodes = allocation.routingNodes();
|
||||||
final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = routingNodes.unassigned().iterator();
|
final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = routingNodes.unassigned().iterator();
|
||||||
|
@ -174,7 +178,7 @@ public abstract class ReplicaShardAllocator extends AbstractComponent {
|
||||||
// will anyhow wait to find an existing copy of the shard to be allocated
|
// will anyhow wait to find an existing copy of the shard to be allocated
|
||||||
// note: the other side of the equation is scheduling a reroute in a timely manner, which happens in the RoutingService
|
// note: the other side of the equation is scheduling a reroute in a timely manner, which happens in the RoutingService
|
||||||
IndexMetaData indexMetaData = allocation.metaData().index(shard.getIndex());
|
IndexMetaData indexMetaData = allocation.metaData().index(shard.getIndex());
|
||||||
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(settings, indexMetaData.getSettings());
|
long delay = shard.unassignedInfo().getDelayAllocationExpirationIn(allocateUnassignedTimestapm, settings, indexMetaData.getSettings());
|
||||||
if (delay > 0) {
|
if (delay > 0) {
|
||||||
logger.debug("[{}][{}]: delaying allocation of [{}] for [{}]", shard.index(), shard.id(), shard, TimeValue.timeValueMillis(delay));
|
logger.debug("[{}][{}]: delaying allocation of [{}] for [{}]", shard.index(), shard.id(), shard, TimeValue.timeValueMillis(delay));
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -228,8 +228,6 @@ public final class ShardGetService extends AbstractIndexShardComponent {
|
||||||
if (source.ttl > 0) {
|
if (source.ttl > 0) {
|
||||||
value = docMapper.TTLFieldMapper().valueForSearch(source.timestamp + source.ttl);
|
value = docMapper.TTLFieldMapper().valueForSearch(source.timestamp + source.ttl);
|
||||||
}
|
}
|
||||||
} else if (field.equals(SizeFieldMapper.NAME) && docMapper.rootMapper(SizeFieldMapper.class).fieldType().stored()) {
|
|
||||||
value = source.source.length();
|
|
||||||
} else {
|
} else {
|
||||||
if (searchLookup == null) {
|
if (searchLookup == null) {
|
||||||
searchLookup = new SearchLookup(mapperService, null, new String[]{type});
|
searchLookup = new SearchLookup(mapperService, null, new String[]{type});
|
||||||
|
|
|
@ -48,7 +48,6 @@ import org.elasticsearch.index.mapper.internal.IdFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.internal.IndexFieldMapper;
|
import org.elasticsearch.index.mapper.internal.IndexFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.internal.ParentFieldMapper;
|
import org.elasticsearch.index.mapper.internal.ParentFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.internal.RoutingFieldMapper;
|
import org.elasticsearch.index.mapper.internal.RoutingFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.internal.SizeFieldMapper;
|
|
||||||
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
|
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.internal.TTLFieldMapper;
|
import org.elasticsearch.index.mapper.internal.TTLFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
|
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
|
||||||
|
@ -106,7 +105,6 @@ public class DocumentMapper implements ToXContent {
|
||||||
this.rootMappers.put(IdFieldMapper.class, new IdFieldMapper(indexSettings, mapperService.fullName(IdFieldMapper.NAME)));
|
this.rootMappers.put(IdFieldMapper.class, new IdFieldMapper(indexSettings, mapperService.fullName(IdFieldMapper.NAME)));
|
||||||
this.rootMappers.put(RoutingFieldMapper.class, new RoutingFieldMapper(indexSettings, mapperService.fullName(RoutingFieldMapper.NAME)));
|
this.rootMappers.put(RoutingFieldMapper.class, new RoutingFieldMapper(indexSettings, mapperService.fullName(RoutingFieldMapper.NAME)));
|
||||||
// add default mappers, order is important (for example analyzer should come before the rest to set context.analyzer)
|
// add default mappers, order is important (for example analyzer should come before the rest to set context.analyzer)
|
||||||
this.rootMappers.put(SizeFieldMapper.class, new SizeFieldMapper(indexSettings, mapperService.fullName(SizeFieldMapper.NAME)));
|
|
||||||
this.rootMappers.put(IndexFieldMapper.class, new IndexFieldMapper(indexSettings, mapperService.fullName(IndexFieldMapper.NAME)));
|
this.rootMappers.put(IndexFieldMapper.class, new IndexFieldMapper(indexSettings, mapperService.fullName(IndexFieldMapper.NAME)));
|
||||||
this.rootMappers.put(SourceFieldMapper.class, new SourceFieldMapper(indexSettings));
|
this.rootMappers.put(SourceFieldMapper.class, new SourceFieldMapper(indexSettings));
|
||||||
this.rootMappers.put(TypeFieldMapper.class, new TypeFieldMapper(indexSettings, mapperService.fullName(TypeFieldMapper.NAME)));
|
this.rootMappers.put(TypeFieldMapper.class, new TypeFieldMapper(indexSettings, mapperService.fullName(TypeFieldMapper.NAME)));
|
||||||
|
@ -283,10 +281,6 @@ public class DocumentMapper implements ToXContent {
|
||||||
return rootMapper(ParentFieldMapper.class);
|
return rootMapper(ParentFieldMapper.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
public SizeFieldMapper sizeFieldMapper() {
|
|
||||||
return rootMapper(SizeFieldMapper.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
public TimestampFieldMapper timestampFieldMapper() {
|
public TimestampFieldMapper timestampFieldMapper() {
|
||||||
return rootMapper(TimestampFieldMapper.class);
|
return rootMapper(TimestampFieldMapper.class);
|
||||||
}
|
}
|
||||||
|
@ -299,10 +293,6 @@ public class DocumentMapper implements ToXContent {
|
||||||
return rootMapper(IndexFieldMapper.class);
|
return rootMapper(IndexFieldMapper.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
public SizeFieldMapper SizeFieldMapper() {
|
|
||||||
return rootMapper(SizeFieldMapper.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Query typeFilter() {
|
public Query typeFilter() {
|
||||||
return typeMapper().fieldType().termQuery(type, null);
|
return typeMapper().fieldType().termQuery(type, null);
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,9 @@
|
||||||
package org.elasticsearch.index.mapper;
|
package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
import com.google.common.collect.ImmutableSortedMap;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.ParseFieldMatcher;
|
import org.elasticsearch.common.ParseFieldMatcher;
|
||||||
|
@ -35,8 +37,6 @@ import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.index.AbstractIndexComponent;
|
|
||||||
import org.elasticsearch.index.Index;
|
|
||||||
import org.elasticsearch.index.analysis.AnalysisService;
|
import org.elasticsearch.index.analysis.AnalysisService;
|
||||||
import org.elasticsearch.index.mapper.core.*;
|
import org.elasticsearch.index.mapper.core.*;
|
||||||
import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper;
|
import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper;
|
||||||
|
@ -50,6 +50,7 @@ import org.elasticsearch.index.similarity.SimilarityLookupService;
|
||||||
import org.elasticsearch.script.Script;
|
import org.elasticsearch.script.Script;
|
||||||
import org.elasticsearch.script.ScriptService;
|
import org.elasticsearch.script.ScriptService;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -73,6 +74,7 @@ public class DocumentMapperParser {
|
||||||
|
|
||||||
private volatile ImmutableMap<String, Mapper.TypeParser> typeParsers;
|
private volatile ImmutableMap<String, Mapper.TypeParser> typeParsers;
|
||||||
private volatile ImmutableMap<String, Mapper.TypeParser> rootTypeParsers;
|
private volatile ImmutableMap<String, Mapper.TypeParser> rootTypeParsers;
|
||||||
|
private volatile ImmutableMap<String, Mapper.TypeParser> additionalRootMappers;
|
||||||
|
|
||||||
public DocumentMapperParser(@IndexSettings Settings indexSettings, MapperService mapperService, AnalysisService analysisService,
|
public DocumentMapperParser(@IndexSettings Settings indexSettings, MapperService mapperService, AnalysisService analysisService,
|
||||||
SimilarityLookupService similarityLookupService, ScriptService scriptService) {
|
SimilarityLookupService similarityLookupService, ScriptService scriptService) {
|
||||||
|
@ -109,7 +111,6 @@ public class DocumentMapperParser {
|
||||||
typeParsers = typeParsersBuilder.immutableMap();
|
typeParsers = typeParsersBuilder.immutableMap();
|
||||||
|
|
||||||
rootTypeParsers = new MapBuilder<String, Mapper.TypeParser>()
|
rootTypeParsers = new MapBuilder<String, Mapper.TypeParser>()
|
||||||
.put(SizeFieldMapper.NAME, new SizeFieldMapper.TypeParser())
|
|
||||||
.put(IndexFieldMapper.NAME, new IndexFieldMapper.TypeParser())
|
.put(IndexFieldMapper.NAME, new IndexFieldMapper.TypeParser())
|
||||||
.put(SourceFieldMapper.NAME, new SourceFieldMapper.TypeParser())
|
.put(SourceFieldMapper.NAME, new SourceFieldMapper.TypeParser())
|
||||||
.put(TypeFieldMapper.NAME, new TypeFieldMapper.TypeParser())
|
.put(TypeFieldMapper.NAME, new TypeFieldMapper.TypeParser())
|
||||||
|
@ -123,6 +124,7 @@ public class DocumentMapperParser {
|
||||||
.put(IdFieldMapper.NAME, new IdFieldMapper.TypeParser())
|
.put(IdFieldMapper.NAME, new IdFieldMapper.TypeParser())
|
||||||
.put(FieldNamesFieldMapper.NAME, new FieldNamesFieldMapper.TypeParser())
|
.put(FieldNamesFieldMapper.NAME, new FieldNamesFieldMapper.TypeParser())
|
||||||
.immutableMap();
|
.immutableMap();
|
||||||
|
additionalRootMappers = ImmutableSortedMap.<String, Mapper.TypeParser>of();
|
||||||
indexVersionCreated = Version.indexCreated(indexSettings);
|
indexVersionCreated = Version.indexCreated(indexSettings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -139,6 +141,10 @@ public class DocumentMapperParser {
|
||||||
rootTypeParsers = new MapBuilder<>(rootTypeParsers)
|
rootTypeParsers = new MapBuilder<>(rootTypeParsers)
|
||||||
.put(type, typeParser)
|
.put(type, typeParser)
|
||||||
.immutableMap();
|
.immutableMap();
|
||||||
|
additionalRootMappers = ImmutableSortedMap.<String, Mapper.TypeParser>naturalOrder()
|
||||||
|
.putAll(additionalRootMappers)
|
||||||
|
.put(type, typeParser)
|
||||||
|
.build();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,6 +210,10 @@ public class DocumentMapperParser {
|
||||||
Mapper.TypeParser.ParserContext parserContext = parserContext();
|
Mapper.TypeParser.ParserContext parserContext = parserContext();
|
||||||
// parse RootObjectMapper
|
// parse RootObjectMapper
|
||||||
DocumentMapper.Builder docBuilder = doc(indexSettings, (RootObjectMapper.Builder) rootObjectTypeParser.parse(type, mapping, parserContext), mapperService);
|
DocumentMapper.Builder docBuilder = doc(indexSettings, (RootObjectMapper.Builder) rootObjectTypeParser.parse(type, mapping, parserContext), mapperService);
|
||||||
|
// Add default mapping for the plugged-in meta mappers
|
||||||
|
for (Map.Entry<String, Mapper.TypeParser> entry : additionalRootMappers.entrySet()) {
|
||||||
|
docBuilder.put((MetadataFieldMapper.Builder<?, ?>) entry.getValue().parse(entry.getKey(), Collections.<String, Object>emptyMap(), parserContext));
|
||||||
|
}
|
||||||
Iterator<Map.Entry<String, Object>> iterator = mapping.entrySet().iterator();
|
Iterator<Map.Entry<String, Object>> iterator = mapping.entrySet().iterator();
|
||||||
// parse DocumentMapper
|
// parse DocumentMapper
|
||||||
while(iterator.hasNext()) {
|
while(iterator.hasNext()) {
|
||||||
|
|
|
@ -182,7 +182,9 @@ public class QueryParseContext {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addNamedQuery(String name, Query query) {
|
public void addNamedQuery(String name, Query query) {
|
||||||
namedQueries.put(name, query);
|
if (query != null) {
|
||||||
|
namedQueries.put(name, query);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public ImmutableMap<String, Query> copyNamedQueries() {
|
public ImmutableMap<String, Query> copyNamedQueries() {
|
||||||
|
|
|
@ -85,7 +85,7 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
throw new IllegalStateException("No such type: " + type);
|
throw new IllegalStateException("No such type: " + type);
|
||||||
}
|
}
|
||||||
final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch);
|
final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch);
|
||||||
return queryBuilder.conbineGrouped(queries);
|
return queryBuilder.combineGrouped(queries);
|
||||||
}
|
}
|
||||||
|
|
||||||
private QueryBuilder queryBuilder;
|
private QueryBuilder queryBuilder;
|
||||||
|
@ -119,7 +119,7 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
return parseAndApply(type, field, value, minimumShouldMatch, boostValue);
|
return parseAndApply(type, field, value, minimumShouldMatch, boostValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Query conbineGrouped(List<? extends Query> groupQuery) {
|
public Query combineGrouped(List<? extends Query> groupQuery) {
|
||||||
if (groupQuery == null || groupQuery.isEmpty()) {
|
if (groupQuery == null || groupQuery.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -196,7 +196,7 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
blendedFields = null;
|
blendedFields = null;
|
||||||
}
|
}
|
||||||
final FieldAndFieldType fieldAndFieldType = group.get(0);
|
final FieldAndFieldType fieldAndFieldType = group.get(0);
|
||||||
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, fieldAndFieldType.boost, value, minimumShouldMatch);
|
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, 1f, value, minimumShouldMatch);
|
||||||
if (q != null) {
|
if (q != null) {
|
||||||
queries.add(q);
|
queries.add(q);
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,6 +86,10 @@ public class NativeScriptEngineService extends AbstractComponent implements Scri
|
||||||
script.setLookup(lookup.getLeafSearchLookup(context));
|
script.setLookup(lookup.getLeafSearchLookup(context));
|
||||||
return script;
|
return script;
|
||||||
}
|
}
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return scriptFactory.needsScores();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,4 +41,11 @@ public interface NativeScriptFactory {
|
||||||
* @param params The parameters passed to the script. Can be <tt>null</tt>.
|
* @param params The parameters passed to the script. Can be <tt>null</tt>.
|
||||||
*/
|
*/
|
||||||
ExecutableScript newScript(@Nullable Map<String, Object> params);
|
ExecutableScript newScript(@Nullable Map<String, Object> params);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates if document scores may be needed by the produced scripts.
|
||||||
|
*
|
||||||
|
* @return {@code true} if scores are needed.
|
||||||
|
*/
|
||||||
|
boolean needsScores();
|
||||||
}
|
}
|
|
@ -29,4 +29,11 @@ public interface SearchScript {
|
||||||
|
|
||||||
LeafSearchScript getLeafSearchScript(LeafReaderContext context) throws IOException;
|
LeafSearchScript getLeafSearchScript(LeafReaderContext context) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates if document scores may be needed by this {@link SearchScript}.
|
||||||
|
*
|
||||||
|
* @return {@code true} if scores are needed.
|
||||||
|
*/
|
||||||
|
boolean needsScores();
|
||||||
|
|
||||||
}
|
}
|
|
@ -112,7 +112,6 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
|
||||||
for (String variable : expr.variables) {
|
for (String variable : expr.variables) {
|
||||||
if (variable.equals("_score")) {
|
if (variable.equals("_score")) {
|
||||||
bindings.add(new SortField("_score", SortField.Type.SCORE));
|
bindings.add(new SortField("_score", SortField.Type.SCORE));
|
||||||
|
|
||||||
} else if (variable.equals("_value")) {
|
} else if (variable.equals("_value")) {
|
||||||
specialValue = new ReplaceableConstValueSource();
|
specialValue = new ReplaceableConstValueSource();
|
||||||
bindings.add("_value", specialValue);
|
bindings.add("_value", specialValue);
|
||||||
|
@ -173,7 +172,8 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ExpressionSearchScript(compiledScript, bindings, specialValue);
|
final boolean needsScores = expr.getSortField(bindings, false).needsScores();
|
||||||
|
return new ExpressionSearchScript(compiledScript, bindings, specialValue, needsScores);
|
||||||
} catch (Exception exception) {
|
} catch (Exception exception) {
|
||||||
throw new ScriptException("Error during search with " + compiledScript, exception);
|
throw new ScriptException("Error during search with " + compiledScript, exception);
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,14 +46,21 @@ class ExpressionSearchScript implements SearchScript {
|
||||||
final SimpleBindings bindings;
|
final SimpleBindings bindings;
|
||||||
final ValueSource source;
|
final ValueSource source;
|
||||||
final ReplaceableConstValueSource specialValue; // _value
|
final ReplaceableConstValueSource specialValue; // _value
|
||||||
|
final boolean needsScores;
|
||||||
Scorer scorer;
|
Scorer scorer;
|
||||||
int docid;
|
int docid;
|
||||||
|
|
||||||
ExpressionSearchScript(CompiledScript c, SimpleBindings b, ReplaceableConstValueSource v) {
|
ExpressionSearchScript(CompiledScript c, SimpleBindings b, ReplaceableConstValueSource v, boolean needsScores) {
|
||||||
compiledScript = c;
|
compiledScript = c;
|
||||||
bindings = b;
|
bindings = b;
|
||||||
source = ((Expression)compiledScript.compiled()).getValueSource(bindings);
|
source = ((Expression)compiledScript.compiled()).getValueSource(bindings);
|
||||||
specialValue = v;
|
specialValue = v;
|
||||||
|
this.needsScores = needsScores;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return needsScores;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -168,6 +168,12 @@ public class GroovyScriptEngineService extends AbstractComponent implements Scri
|
||||||
}
|
}
|
||||||
return new GroovyScript(compiledScript, scriptObject, leafLookup, logger);
|
return new GroovyScript(compiledScript, scriptObject, leafLookup, logger);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
// TODO: can we reliably know if a groovy script makes use of _score
|
||||||
|
return true;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -216,8 +216,7 @@ public abstract class ValuesSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean needsScores() {
|
public boolean needsScores() {
|
||||||
// TODO: add a way to know whether scripts are using scores
|
return script.needsScores();
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -295,8 +294,7 @@ public abstract class ValuesSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean needsScores() {
|
public boolean needsScores() {
|
||||||
// TODO: add a way to know whether scripts are using scores
|
return script.needsScores();
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -431,8 +429,7 @@ public abstract class ValuesSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean needsScores() {
|
public boolean needsScores() {
|
||||||
// TODO: add a way to know whether scripts are using scores
|
return script.needsScores();
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -451,8 +448,7 @@ public abstract class ValuesSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean needsScores() {
|
public boolean needsScores() {
|
||||||
// TODO: add a way to know whether scripts are using scores
|
return script.needsScores();
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -34,6 +34,11 @@ public class NativeScript1 extends AbstractSearchScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativeScript1();
|
return new NativeScript1();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String NATIVE_SCRIPT_1 = "native_1";
|
public static final String NATIVE_SCRIPT_1 = "native_1";
|
||||||
|
|
|
@ -34,6 +34,11 @@ public class NativeScript2 extends AbstractSearchScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativeScript2();
|
return new NativeScript2();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String NATIVE_SCRIPT_2 = "native_2";
|
public static final String NATIVE_SCRIPT_2 = "native_2";
|
||||||
|
|
|
@ -34,6 +34,11 @@ public class NativeScript3 extends AbstractSearchScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativeScript3();
|
return new NativeScript3();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String NATIVE_SCRIPT_3 = "native_3";
|
public static final String NATIVE_SCRIPT_3 = "native_3";
|
||||||
|
|
|
@ -34,6 +34,11 @@ public class NativeScript4 extends AbstractSearchScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativeScript4();
|
return new NativeScript4();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String NATIVE_SCRIPT_4 = "native_4";
|
public static final String NATIVE_SCRIPT_4 = "native_4";
|
||||||
|
|
|
@ -36,6 +36,11 @@ public class NativeConstantForLoopScoreScript extends AbstractSearchScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativeConstantForLoopScoreScript(params);
|
return new NativeConstantForLoopScoreScript(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private NativeConstantForLoopScoreScript(Map<String, Object> params) {
|
private NativeConstantForLoopScoreScript(Map<String, Object> params) {
|
||||||
|
|
|
@ -36,6 +36,11 @@ public class NativeConstantScoreScript extends AbstractSearchScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativeConstantScoreScript();
|
return new NativeConstantScoreScript();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private NativeConstantScoreScript() {
|
private NativeConstantScoreScript() {
|
||||||
|
|
|
@ -42,6 +42,11 @@ public class NativeNaiveTFIDFScoreScript extends AbstractSearchScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativeNaiveTFIDFScoreScript(params);
|
return new NativeNaiveTFIDFScoreScript(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private NativeNaiveTFIDFScoreScript(Map<String, Object> params) {
|
private NativeNaiveTFIDFScoreScript(Map<String, Object> params) {
|
||||||
|
|
|
@ -44,6 +44,11 @@ public class NativePayloadSumNoRecordScoreScript extends AbstractSearchScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativePayloadSumNoRecordScoreScript(params);
|
return new NativePayloadSumNoRecordScoreScript(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private NativePayloadSumNoRecordScoreScript(Map<String, Object> params) {
|
private NativePayloadSumNoRecordScoreScript(Map<String, Object> params) {
|
||||||
|
|
|
@ -44,6 +44,11 @@ public class NativePayloadSumScoreScript extends AbstractSearchScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativePayloadSumScoreScript(params);
|
return new NativePayloadSumScoreScript(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private NativePayloadSumScoreScript(Map<String, Object> params) {
|
private NativePayloadSumScoreScript(Map<String, Object> params) {
|
||||||
|
|
|
@ -34,15 +34,18 @@ import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationComman
|
||||||
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
|
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
|
||||||
import org.elasticsearch.cluster.routing.allocation.decider.DisableAllocationDecider;
|
import org.elasticsearch.cluster.routing.allocation.decider.DisableAllocationDecider;
|
||||||
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider.Allocation;
|
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider.Allocation;
|
||||||
|
import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
|
||||||
import org.elasticsearch.common.Priority;
|
import org.elasticsearch.common.Priority;
|
||||||
import org.elasticsearch.common.io.FileSystemUtils;
|
import org.elasticsearch.common.io.FileSystemUtils;
|
||||||
import org.elasticsearch.common.logging.ESLogger;
|
import org.elasticsearch.common.logging.ESLogger;
|
||||||
import org.elasticsearch.common.logging.Loggers;
|
import org.elasticsearch.common.logging.Loggers;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
import org.elasticsearch.env.NodeEnvironment;
|
import org.elasticsearch.env.NodeEnvironment;
|
||||||
import org.elasticsearch.index.shard.ShardId;
|
import org.elasticsearch.index.shard.ShardId;
|
||||||
import org.elasticsearch.test.ESIntegTestCase;
|
import org.elasticsearch.test.ESIntegTestCase;
|
||||||
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
|
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
|
||||||
|
import org.elasticsearch.test.InternalTestCluster;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
@ -160,6 +163,40 @@ public class ClusterRerouteIT extends ESIntegTestCase {
|
||||||
rerouteWithAllocateLocalGateway(commonSettings);
|
rerouteWithAllocateLocalGateway(commonSettings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDelayWithALargeAmountOfShards() throws Exception {
|
||||||
|
Settings commonSettings = settingsBuilder()
|
||||||
|
.put("gateway.type", "local")
|
||||||
|
.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_CONCURRENT_RECOVERIES, 1)
|
||||||
|
.build();
|
||||||
|
logger.info("--> starting 4 nodes");
|
||||||
|
String node_1 = internalCluster().startNode(commonSettings);
|
||||||
|
internalCluster().startNode(commonSettings);
|
||||||
|
internalCluster().startNode(commonSettings);
|
||||||
|
internalCluster().startNode(commonSettings);
|
||||||
|
|
||||||
|
assertThat(cluster().size(), equalTo(4));
|
||||||
|
ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForNodes("4").execute().actionGet();
|
||||||
|
assertThat(healthResponse.isTimedOut(), equalTo(false));
|
||||||
|
|
||||||
|
logger.info("--> create indices");
|
||||||
|
for (int i = 0; i < 25; i++) {
|
||||||
|
client().admin().indices().prepareCreate("test" + i)
|
||||||
|
.setSettings(settingsBuilder()
|
||||||
|
.put("index.number_of_shards", 5).put("index.number_of_replicas", 1)
|
||||||
|
.put("index.unassigned.node_left.delayed_timeout", randomIntBetween(250, 1000) + "ms"))
|
||||||
|
.execute().actionGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
ensureGreen(TimeValue.timeValueMinutes(1));
|
||||||
|
|
||||||
|
logger.info("--> stopping node1");
|
||||||
|
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node_1));
|
||||||
|
|
||||||
|
// This might run slowly on older hardware
|
||||||
|
ensureGreen(TimeValue.timeValueMinutes(2));
|
||||||
|
}
|
||||||
|
|
||||||
private void rerouteWithAllocateLocalGateway(Settings commonSettings) throws Exception {
|
private void rerouteWithAllocateLocalGateway(Settings commonSettings) throws Exception {
|
||||||
logger.info("--> starting 2 nodes");
|
logger.info("--> starting 2 nodes");
|
||||||
String node_1 = internalCluster().startNode(commonSettings);
|
String node_1 = internalCluster().startNode(commonSettings);
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.elasticsearch.cluster.metadata.MetaData;
|
||||||
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
||||||
import org.elasticsearch.cluster.routing.allocation.AllocationService;
|
import org.elasticsearch.cluster.routing.allocation.AllocationService;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
import org.elasticsearch.test.ESAllocationTestCase;
|
import org.elasticsearch.test.ESAllocationTestCase;
|
||||||
import org.elasticsearch.threadpool.ThreadPool;
|
import org.elasticsearch.threadpool.ThreadPool;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
@ -112,6 +113,10 @@ public class RoutingServiceTests extends ESAllocationTestCase {
|
||||||
ClusterState prevState = clusterState;
|
ClusterState prevState = clusterState;
|
||||||
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
|
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
|
||||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||||
|
// We need to update the routing service's last attempted run to
|
||||||
|
// signal that the GatewayAllocator tried to allocated it but
|
||||||
|
// it was delayed
|
||||||
|
routingService.setUnassignedShardsAllocatedTimestamp(System.currentTimeMillis());
|
||||||
ClusterState newState = clusterState;
|
ClusterState newState = clusterState;
|
||||||
|
|
||||||
routingService.clusterChanged(new ClusterChangedEvent("test", newState, prevState));
|
routingService.clusterChanged(new ClusterChangedEvent("test", newState, prevState));
|
||||||
|
@ -125,6 +130,44 @@ public class RoutingServiceTests extends ESAllocationTestCase {
|
||||||
assertThat(routingService.getRegisteredNextDelaySetting(), equalTo(Long.MAX_VALUE));
|
assertThat(routingService.getRegisteredNextDelaySetting(), equalTo(Long.MAX_VALUE));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDelayedUnassignedDoesNotRerouteForNegativeDelays() throws Exception {
|
||||||
|
AllocationService allocation = createAllocationService();
|
||||||
|
MetaData metaData = MetaData.builder()
|
||||||
|
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "100ms"))
|
||||||
|
.numberOfShards(1).numberOfReplicas(1))
|
||||||
|
.build();
|
||||||
|
ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
|
||||||
|
.metaData(metaData)
|
||||||
|
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test"))).build();
|
||||||
|
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")).localNodeId("node1").masterNodeId("node1")).build();
|
||||||
|
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||||
|
// starting primaries
|
||||||
|
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
|
||||||
|
// starting replicas
|
||||||
|
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
|
||||||
|
assertThat(clusterState.routingNodes().hasUnassigned(), equalTo(false));
|
||||||
|
// remove node2 and reroute
|
||||||
|
ClusterState prevState = clusterState;
|
||||||
|
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
|
||||||
|
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||||
|
// Set it in the future so the delay will be negative
|
||||||
|
routingService.setUnassignedShardsAllocatedTimestamp(System.currentTimeMillis() + TimeValue.timeValueMinutes(1).millis());
|
||||||
|
|
||||||
|
ClusterState newState = clusterState;
|
||||||
|
|
||||||
|
routingService.clusterChanged(new ClusterChangedEvent("test", newState, prevState));
|
||||||
|
assertBusy(new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
assertThat(routingService.hasReroutedAndClear(), equalTo(false));
|
||||||
|
|
||||||
|
// verify the registration has been updated
|
||||||
|
assertThat(routingService.getRegisteredNextDelaySetting(), equalTo(100L));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
private class TestRoutingService extends RoutingService {
|
private class TestRoutingService extends RoutingService {
|
||||||
|
|
||||||
private AtomicBoolean rerouted = new AtomicBoolean();
|
private AtomicBoolean rerouted = new AtomicBoolean();
|
||||||
|
|
|
@ -273,7 +273,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
||||||
assertBusy(new Runnable() {
|
assertBusy(new Runnable() {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
long delay = unassignedInfo.getDelayAllocationExpirationIn(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
long delay = unassignedInfo.getDelayAllocationExpirationIn(System.currentTimeMillis(),
|
||||||
|
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
||||||
assertThat(delay, greaterThan(0l));
|
assertThat(delay, greaterThan(0l));
|
||||||
assertThat(delay, lessThan(TimeValue.timeValueHours(10).millis()));
|
assertThat(delay, lessThan(TimeValue.timeValueHours(10).millis()));
|
||||||
}
|
}
|
||||||
|
@ -290,7 +291,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
||||||
UnassignedInfo unassignedInfo = new UnassignedInfo(RandomPicks.randomFrom(getRandom(), reasons), null);
|
UnassignedInfo unassignedInfo = new UnassignedInfo(RandomPicks.randomFrom(getRandom(), reasons), null);
|
||||||
long delay = unassignedInfo.getAllocationDelayTimeoutSetting(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
long delay = unassignedInfo.getAllocationDelayTimeoutSetting(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
||||||
assertThat(delay, equalTo(0l));
|
assertThat(delay, equalTo(0l));
|
||||||
delay = unassignedInfo.getDelayAllocationExpirationIn(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
delay = unassignedInfo.getDelayAllocationExpirationIn(System.currentTimeMillis(),
|
||||||
|
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), Settings.EMPTY);
|
||||||
assertThat(delay, equalTo(0l));
|
assertThat(delay, equalTo(0l));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,7 +308,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
||||||
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test1")).addAsNew(metaData.index("test2"))).build();
|
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test1")).addAsNew(metaData.index("test2"))).build();
|
||||||
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build();
|
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build();
|
||||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||||
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
|
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(),
|
||||||
|
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
|
||||||
// starting primaries
|
// starting primaries
|
||||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
|
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
|
||||||
// starting replicas
|
// starting replicas
|
||||||
|
@ -315,7 +318,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
||||||
// remove node2 and reroute
|
// remove node2 and reroute
|
||||||
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
|
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build();
|
||||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||||
assertThat(clusterState.prettyPrint(), UnassignedInfo.getNumberOfDelayedUnassigned(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(2));
|
assertThat(clusterState.prettyPrint(), UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(),
|
||||||
|
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -330,7 +334,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
||||||
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test1")).addAsNew(metaData.index("test2"))).build();
|
.routingTable(RoutingTable.builder().addAsNew(metaData.index("test1")).addAsNew(metaData.index("test2"))).build();
|
||||||
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build();
|
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build();
|
||||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build();
|
||||||
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
|
assertThat(UnassignedInfo.getNumberOfDelayedUnassigned(System.currentTimeMillis(),
|
||||||
|
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState), equalTo(0));
|
||||||
// starting primaries
|
// starting primaries
|
||||||
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
|
clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build();
|
||||||
// starting replicas
|
// starting replicas
|
||||||
|
@ -343,7 +348,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
|
||||||
long nextDelaySetting = UnassignedInfo.findSmallestDelayedAllocationSetting(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState);
|
long nextDelaySetting = UnassignedInfo.findSmallestDelayedAllocationSetting(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState);
|
||||||
assertThat(nextDelaySetting, equalTo(TimeValue.timeValueHours(10).millis()));
|
assertThat(nextDelaySetting, equalTo(TimeValue.timeValueHours(10).millis()));
|
||||||
|
|
||||||
long nextDelay = UnassignedInfo.findNextDelayedAllocationIn(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState);
|
long nextDelay = UnassignedInfo.findNextDelayedAllocationIn(System.currentTimeMillis(),
|
||||||
|
Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, "10h").build(), clusterState);
|
||||||
assertThat(nextDelay, greaterThan(TimeValue.timeValueHours(9).millis()));
|
assertThat(nextDelay, greaterThan(TimeValue.timeValueHours(9).millis()));
|
||||||
assertThat(nextDelay, lessThanOrEqualTo(TimeValue.timeValueHours(10).millis()));
|
assertThat(nextDelay, lessThanOrEqualTo(TimeValue.timeValueHours(10).millis()));
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,5 +73,10 @@ public class ScriptScoreFunctionTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1031,9 +1031,6 @@ public class GetActionIT extends ESIntegTestCase {
|
||||||
" \"doc\": {\n" +
|
" \"doc\": {\n" +
|
||||||
" \"_timestamp\": {\n" +
|
" \"_timestamp\": {\n" +
|
||||||
" \"enabled\": true\n" +
|
" \"enabled\": true\n" +
|
||||||
" },\n" +
|
|
||||||
" \"_size\": {\n" +
|
|
||||||
" \"enabled\": true\n" +
|
|
||||||
" }\n" +
|
" }\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
|
@ -1045,7 +1042,7 @@ public class GetActionIT extends ESIntegTestCase {
|
||||||
" \"text\": \"some text.\"\n" +
|
" \"text\": \"some text.\"\n" +
|
||||||
"}\n";
|
"}\n";
|
||||||
client().prepareIndex("test", "doc").setId("1").setSource(doc).setRouting("1").get();
|
client().prepareIndex("test", "doc").setId("1").setSource(doc).setRouting("1").get();
|
||||||
String[] fieldsList = {"_timestamp", "_size", "_routing"};
|
String[] fieldsList = {"_timestamp", "_routing"};
|
||||||
// before refresh - document is only in translog
|
// before refresh - document is only in translog
|
||||||
assertGetFieldsAlwaysWorks(indexOrAlias(), "doc", "1", fieldsList, "1");
|
assertGetFieldsAlwaysWorks(indexOrAlias(), "doc", "1", fieldsList, "1");
|
||||||
refresh();
|
refresh();
|
||||||
|
|
|
@ -44,8 +44,8 @@ import org.elasticsearch.index.mapper.MapperParsingException;
|
||||||
import org.elasticsearch.index.mapper.ParseContext.Document;
|
import org.elasticsearch.index.mapper.ParseContext.Document;
|
||||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||||
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
|
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.internal.SizeFieldMapper;
|
|
||||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
|
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
|
||||||
import org.hamcrest.Matchers;
|
import org.hamcrest.Matchers;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@ -387,7 +387,7 @@ public class SimpleAllMapperTests extends ESSingleNodeTestCase {
|
||||||
String mapping = "{";
|
String mapping = "{";
|
||||||
Map<String, String> rootTypes = new HashMap<>();
|
Map<String, String> rootTypes = new HashMap<>();
|
||||||
//just pick some example from DocumentMapperParser.rootTypeParsers
|
//just pick some example from DocumentMapperParser.rootTypeParsers
|
||||||
rootTypes.put(SizeFieldMapper.NAME, "{\"enabled\" : true}");
|
rootTypes.put(TimestampFieldMapper.NAME, "{\"enabled\" : true}");
|
||||||
rootTypes.put("include_in_all", "true");
|
rootTypes.put("include_in_all", "true");
|
||||||
rootTypes.put("dynamic_date_formats", "[\"yyyy-MM-dd\", \"dd-MM-yyyy\"]");
|
rootTypes.put("dynamic_date_formats", "[\"yyyy-MM-dd\", \"dd-MM-yyyy\"]");
|
||||||
rootTypes.put("numeric_detection", "true");
|
rootTypes.put("numeric_detection", "true");
|
||||||
|
|
|
@ -33,16 +33,16 @@ public class ParseMappingTypeLevelTests extends ESSingleNodeTestCase {
|
||||||
@Test
|
@Test
|
||||||
public void testTypeLevel() throws Exception {
|
public void testTypeLevel() throws Exception {
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
.startObject("_size").field("enabled", true).endObject()
|
.startObject("_timestamp").field("enabled", true).endObject()
|
||||||
.endObject().endObject().string();
|
.endObject().endObject().string();
|
||||||
|
|
||||||
DocumentMapperParser parser = createIndex("test").mapperService().documentMapperParser();
|
DocumentMapperParser parser = createIndex("test").mapperService().documentMapperParser();
|
||||||
DocumentMapper mapper = parser.parse("type", mapping);
|
DocumentMapper mapper = parser.parse("type", mapping);
|
||||||
assertThat(mapper.type(), equalTo("type"));
|
assertThat(mapper.type(), equalTo("type"));
|
||||||
assertThat(mapper.sizeFieldMapper().enabled(), equalTo(true));
|
assertThat(mapper.timestampFieldMapper().enabled(), equalTo(true));
|
||||||
|
|
||||||
mapper = parser.parse(mapping);
|
mapper = parser.parse(mapping);
|
||||||
assertThat(mapper.type(), equalTo("type"));
|
assertThat(mapper.type(), equalTo("type"));
|
||||||
assertThat(mapper.sizeFieldMapper().enabled(), equalTo(true));
|
assertThat(mapper.timestampFieldMapper().enabled(), equalTo(true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -158,25 +158,6 @@ public class UpdateMappingTests extends ESSingleNodeTestCase {
|
||||||
assertTrue(documentMapper.timestampFieldMapper().fieldType().stored());
|
assertTrue(documentMapper.timestampFieldMapper().fieldType().stored());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testSizeParsing() throws IOException {
|
|
||||||
IndexService indexService = createIndex("test", Settings.settingsBuilder().build());
|
|
||||||
XContentBuilder indexMapping = XContentFactory.jsonBuilder();
|
|
||||||
boolean enabled = randomBoolean();
|
|
||||||
indexMapping.startObject()
|
|
||||||
.startObject("type")
|
|
||||||
.startObject("_size")
|
|
||||||
.field("enabled", enabled)
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
|
||||||
.endObject();
|
|
||||||
DocumentMapper documentMapper = indexService.mapperService().parse("type", new CompressedXContent(indexMapping.string()), true);
|
|
||||||
assertThat(documentMapper.sizeFieldMapper().enabled(), equalTo(enabled));
|
|
||||||
assertTrue(documentMapper.sizeFieldMapper().fieldType().stored());
|
|
||||||
documentMapper = indexService.mapperService().parse("type", new CompressedXContent(documentMapper.mappingSource().string()), true);
|
|
||||||
assertThat(documentMapper.sizeFieldMapper().enabled(), equalTo(enabled));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSizeTimestampIndexParsing() throws IOException {
|
public void testSizeTimestampIndexParsing() throws IOException {
|
||||||
IndexService indexService = createIndex("test", Settings.settingsBuilder().build());
|
IndexService indexService = createIndex("test", Settings.settingsBuilder().build());
|
||||||
|
@ -192,7 +173,7 @@ public class UpdateMappingTests extends ESSingleNodeTestCase {
|
||||||
createIndex("test1", Settings.settingsBuilder().build());
|
createIndex("test1", Settings.settingsBuilder().build());
|
||||||
createIndex("test2", Settings.settingsBuilder().build());
|
createIndex("test2", Settings.settingsBuilder().build());
|
||||||
XContentBuilder defaultMapping = XContentFactory.jsonBuilder().startObject()
|
XContentBuilder defaultMapping = XContentFactory.jsonBuilder().startObject()
|
||||||
.startObject(MapperService.DEFAULT_MAPPING).startObject("_size").field("enabled", true).endObject().endObject()
|
.startObject(MapperService.DEFAULT_MAPPING).startObject("_timestamp").field("enabled", true).endObject().endObject()
|
||||||
.endObject();
|
.endObject();
|
||||||
client().admin().indices().preparePutMapping().setType(MapperService.DEFAULT_MAPPING).setSource(defaultMapping).get();
|
client().admin().indices().preparePutMapping().setType(MapperService.DEFAULT_MAPPING).setSource(defaultMapping).get();
|
||||||
XContentBuilder typeMapping = XContentFactory.jsonBuilder().startObject()
|
XContentBuilder typeMapping = XContentFactory.jsonBuilder().startObject()
|
||||||
|
@ -204,7 +185,7 @@ public class UpdateMappingTests extends ESSingleNodeTestCase {
|
||||||
GetMappingsResponse response = client().admin().indices().prepareGetMappings("test2").get();
|
GetMappingsResponse response = client().admin().indices().prepareGetMappings("test2").get();
|
||||||
assertNotNull(response.getMappings().get("test2").get("type").getSourceAsMap().get("_all"));
|
assertNotNull(response.getMappings().get("test2").get("type").getSourceAsMap().get("_all"));
|
||||||
assertFalse((Boolean) ((LinkedHashMap) response.getMappings().get("test2").get("type").getSourceAsMap().get("_all")).get("enabled"));
|
assertFalse((Boolean) ((LinkedHashMap) response.getMappings().get("test2").get("type").getSourceAsMap().get("_all")).get("enabled"));
|
||||||
assertNotNull(response.getMappings().get("test2").get("type").getSourceAsMap().get("_size"));
|
assertNotNull(response.getMappings().get("test2").get("type").getSourceAsMap().get("_timestamp"));
|
||||||
assertTrue((Boolean)((LinkedHashMap)response.getMappings().get("test2").get("type").getSourceAsMap().get("_size")).get("enabled"));
|
assertTrue((Boolean)((LinkedHashMap)response.getMappings().get("test2").get("type").getSourceAsMap().get("_timestamp")).get("enabled"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
{"type":{"_size":{"enabled":false},"_timestamp":{"enabled":false}}}
|
{"type":{"_timestamp":{"enabled":false}}}
|
|
@ -0,0 +1,52 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.query;
|
||||||
|
|
||||||
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class CommonTermsQueryParserTest extends ESSingleNodeTestCase {
|
||||||
|
@Test
|
||||||
|
public void testWhenParsedQueryIsNullNoNullPointerExceptionIsThrown() throws IOException {
|
||||||
|
final String index = "test-index";
|
||||||
|
final String type = "test-type";
|
||||||
|
client()
|
||||||
|
.admin()
|
||||||
|
.indices()
|
||||||
|
.prepareCreate(index)
|
||||||
|
.addMapping(type, "name", "type=string,analyzer=stop")
|
||||||
|
.execute()
|
||||||
|
.actionGet();
|
||||||
|
ensureGreen();
|
||||||
|
|
||||||
|
CommonTermsQueryBuilder commonTermsQueryBuilder =
|
||||||
|
new CommonTermsQueryBuilder("name", "the").queryName("query-name");
|
||||||
|
|
||||||
|
// the named query parses to null; we are testing this does not cause a NullPointerException
|
||||||
|
SearchResponse response =
|
||||||
|
client().prepareSearch(index).setTypes(type).setQuery(commonTermsQueryBuilder).execute().actionGet();
|
||||||
|
|
||||||
|
assertNotNull(response);
|
||||||
|
assertEquals(response.getHits().hits().length, 0);
|
||||||
|
}
|
||||||
|
}
|
|
@ -54,6 +54,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.index.IndexService;
|
import org.elasticsearch.index.IndexService;
|
||||||
|
import org.elasticsearch.index.engine.Engine;
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||||
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
||||||
|
@ -83,6 +84,7 @@ import static org.hamcrest.Matchers.*;
|
||||||
public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
private IndexQueryParserService queryParser;
|
private IndexQueryParserService queryParser;
|
||||||
|
private IndexService indexService;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() throws IOException {
|
public void setup() throws IOException {
|
||||||
|
@ -99,6 +101,7 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
||||||
assertNotNull(doc.dynamicMappingsUpdate());
|
assertNotNull(doc.dynamicMappingsUpdate());
|
||||||
client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get();
|
client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get();
|
||||||
|
|
||||||
|
this.indexService = indexService;
|
||||||
queryParser = indexService.queryParserService();
|
queryParser = indexService.queryParserService();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2269,6 +2272,23 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
||||||
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
|
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCrossFieldMultiMatchQuery() throws IOException {
|
||||||
|
IndexQueryParserService queryParser = queryParser();
|
||||||
|
Query parsedQuery = queryParser.parse(multiMatchQuery("banon", "name.first^2", "name.last^3", "foobar").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)).query();
|
||||||
|
try (Engine.Searcher searcher = indexService.shardSafe(0).acquireSearcher("test")) {
|
||||||
|
Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery);
|
||||||
|
|
||||||
|
BooleanQuery expected = new BooleanQuery();
|
||||||
|
expected.add(new TermQuery(new Term("foobar", "banon")), Occur.SHOULD);
|
||||||
|
TermQuery tq1 = new TermQuery(new Term("name.first", "banon"));
|
||||||
|
tq1.setBoost(2);
|
||||||
|
TermQuery tq2 = new TermQuery(new Term("name.last", "banon"));
|
||||||
|
tq2.setBoost(3);
|
||||||
|
expected.add(new DisjunctionMaxQuery(Arrays.<Query>asList(tq1, tq2), 0f), Occur.SHOULD);
|
||||||
|
assertEquals(expected, rewrittenQuery);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSimpleQueryString() throws Exception {
|
public void testSimpleQueryString() throws Exception {
|
||||||
IndexQueryParserService queryParser = queryParser();
|
IndexQueryParserService queryParser = queryParser();
|
||||||
|
|
|
@ -506,37 +506,6 @@ public class PercolatorIT extends ESIntegTestCase {
|
||||||
assertThat(percolate.getMatches(), emptyArray());
|
assertThat(percolate.getMatches(), emptyArray());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void percolateWithSizeField() throws Exception {
|
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
|
|
||||||
.startObject("_size").field("enabled", true).endObject()
|
|
||||||
.startObject("properties").startObject("field1").field("type", "string").endObject().endObject()
|
|
||||||
.endObject().endObject().string();
|
|
||||||
|
|
||||||
assertAcked(prepareCreate("test").addMapping("type1", mapping));
|
|
||||||
ensureGreen();
|
|
||||||
|
|
||||||
logger.info("--> register a query");
|
|
||||||
client().prepareIndex("test", PercolatorService.TYPE_NAME, "kuku")
|
|
||||||
.setSource(jsonBuilder().startObject()
|
|
||||||
.field("query", termQuery("field1", "value1"))
|
|
||||||
.endObject())
|
|
||||||
.setRefresh(true)
|
|
||||||
.execute().actionGet();
|
|
||||||
|
|
||||||
logger.info("--> percolate a document");
|
|
||||||
PercolateResponse percolate = client().preparePercolate().setIndices("test").setDocumentType("type1")
|
|
||||||
.setSource(jsonBuilder().startObject()
|
|
||||||
.startObject("doc")
|
|
||||||
.field("field1", "value1")
|
|
||||||
.endObject()
|
|
||||||
.endObject())
|
|
||||||
.execute().actionGet();
|
|
||||||
assertMatchCount(percolate, 1l);
|
|
||||||
assertThat(percolate.getMatches(), arrayWithSize(1));
|
|
||||||
assertThat(convertFromTextArray(percolate.getMatches(), "test"), arrayContaining("kuku"));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPercolateStatistics() throws Exception {
|
public void testPercolateStatistics() throws Exception {
|
||||||
client().admin().indices().prepareCreate("test").execute().actionGet();
|
client().admin().indices().prepareCreate("test").execute().actionGet();
|
||||||
|
|
|
@ -97,6 +97,11 @@ public class NativeScriptTests extends ESTestCase {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new MyScript();
|
return new MyScript();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class MyScript extends AbstractExecutableScript {
|
static class MyScript extends AbstractExecutableScript {
|
||||||
|
|
|
@ -81,6 +81,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new IntScript();
|
return new IntScript();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class IntScript extends AbstractSearchScript {
|
static class IntScript extends AbstractSearchScript {
|
||||||
|
@ -95,6 +100,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new LongScript();
|
return new LongScript();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class LongScript extends AbstractSearchScript {
|
static class LongScript extends AbstractSearchScript {
|
||||||
|
@ -109,6 +119,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new FloatScript();
|
return new FloatScript();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class FloatScript extends AbstractSearchScript {
|
static class FloatScript extends AbstractSearchScript {
|
||||||
|
@ -123,6 +138,11 @@ public class ScriptFieldIT extends ESIntegTestCase {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new DoubleScript();
|
return new DoubleScript();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class DoubleScript extends AbstractSearchScript {
|
static class DoubleScript extends AbstractSearchScript {
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.script.expression;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.index.IndexService;
|
||||||
|
import org.elasticsearch.script.CompiledScript;
|
||||||
|
import org.elasticsearch.script.ScriptService.ScriptType;
|
||||||
|
import org.elasticsearch.script.SearchScript;
|
||||||
|
import org.elasticsearch.search.lookup.SearchLookup;
|
||||||
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
public class ExpressionScriptTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
|
public void testNeedsScores() {
|
||||||
|
IndexService index = createIndex("test", Settings.EMPTY, "type", "d", "type=double");
|
||||||
|
|
||||||
|
ExpressionScriptEngineService service = new ExpressionScriptEngineService(Settings.EMPTY);
|
||||||
|
SearchLookup lookup = new SearchLookup(index.mapperService(), index.fieldData(), null);
|
||||||
|
|
||||||
|
Object compiled = service.compile("1.2");
|
||||||
|
SearchScript ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
|
||||||
|
assertFalse(ss.needsScores());
|
||||||
|
|
||||||
|
compiled = service.compile("doc['d'].value");
|
||||||
|
ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
|
||||||
|
assertFalse(ss.needsScores());
|
||||||
|
|
||||||
|
compiled = service.compile("1/_score");
|
||||||
|
ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
|
||||||
|
assertTrue(ss.needsScores());
|
||||||
|
|
||||||
|
compiled = service.compile("doc['d'].value * _score");
|
||||||
|
ss = service.search(new CompiledScript(ScriptType.INLINE, "randomName", "expression", compiled), lookup, Collections.<String, Object>emptyMap());
|
||||||
|
assertTrue(ss.needsScores());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -35,6 +35,11 @@ public class NativeSignificanceScoreScriptNoParams extends TestScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativeSignificanceScoreScriptNoParams();
|
return new NativeSignificanceScoreScriptNoParams();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private NativeSignificanceScoreScriptNoParams() {
|
private NativeSignificanceScoreScriptNoParams() {
|
||||||
|
|
|
@ -36,6 +36,11 @@ public class NativeSignificanceScoreScriptWithParams extends TestScript {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new NativeSignificanceScoreScriptWithParams(params);
|
return new NativeSignificanceScoreScriptWithParams(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private NativeSignificanceScoreScriptWithParams(Map<String, Object> params) {
|
private NativeSignificanceScoreScriptWithParams(Map<String, Object> params) {
|
||||||
|
|
|
@ -76,9 +76,8 @@ public class SearchFieldsIT extends ESIntegTestCase {
|
||||||
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().execute().actionGet();
|
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().execute().actionGet();
|
||||||
|
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
|
||||||
// _timestamp and _size are randomly enabled via templates but we don't want it here to test stored fields behaviour
|
// _timestamp is randomly enabled via templates but we don't want it here to test stored fields behaviour
|
||||||
.startObject("_timestamp").field("enabled", false).endObject()
|
.startObject("_timestamp").field("enabled", false).endObject()
|
||||||
.startObject("_size").field("enabled", false).endObject()
|
|
||||||
.startObject("properties")
|
.startObject("properties")
|
||||||
.startObject("field1").field("type", "string").field("store", "yes").endObject()
|
.startObject("field1").field("type", "string").field("store", "yes").endObject()
|
||||||
.startObject("field2").field("type", "string").field("store", "no").endObject()
|
.startObject("field2").field("type", "string").field("store", "no").endObject()
|
||||||
|
|
|
@ -102,6 +102,10 @@ public class ExplainableScriptIT extends ESIntegTestCase {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new MyScript();
|
return new MyScript();
|
||||||
}
|
}
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class MyScript extends AbstractDoubleSearchScript implements ExplainableSearchScript, ExecutableScript {
|
static class MyScript extends AbstractDoubleSearchScript implements ExplainableSearchScript, ExecutableScript {
|
||||||
|
|
|
@ -102,7 +102,6 @@ import org.elasticsearch.index.fielddata.FieldDataType;
|
||||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType.Loading;
|
import org.elasticsearch.index.mapper.MappedFieldType.Loading;
|
||||||
import org.elasticsearch.index.mapper.internal.SizeFieldMapper;
|
|
||||||
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
|
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
|
||||||
import org.elasticsearch.index.shard.MergePolicyConfig;
|
import org.elasticsearch.index.shard.MergePolicyConfig;
|
||||||
import org.elasticsearch.index.translog.Translog;
|
import org.elasticsearch.index.translog.Translog;
|
||||||
|
@ -357,11 +356,6 @@ public abstract class ESIntegTestCase extends ESTestCase {
|
||||||
.field("enabled", randomBoolean());
|
.field("enabled", randomBoolean());
|
||||||
mappings.endObject();
|
mappings.endObject();
|
||||||
}
|
}
|
||||||
if (randomBoolean()) {
|
|
||||||
mappings.startObject(SizeFieldMapper.NAME)
|
|
||||||
.field("enabled", randomBoolean())
|
|
||||||
.endObject();
|
|
||||||
}
|
|
||||||
mappings.startArray("dynamic_templates")
|
mappings.startArray("dynamic_templates")
|
||||||
.startObject()
|
.startObject()
|
||||||
.startObject("template-strings")
|
.startObject("template-strings")
|
||||||
|
|
|
@ -74,6 +74,10 @@ public class UpdateByNativeScriptIT extends ESIntegTestCase {
|
||||||
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
|
||||||
return new CustomScript(params);
|
return new CustomScript(params);
|
||||||
}
|
}
|
||||||
|
@Override
|
||||||
|
public boolean needsScores() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class CustomScript extends AbstractExecutableScript {
|
static class CustomScript extends AbstractExecutableScript {
|
||||||
|
|
|
@ -53,7 +53,7 @@ Response:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
The specified field must be of type `geo_point` (which can only be set explicitly in the mappings). And it can also hold an array of `geo_point` fields, in which case all will be taken into account during aggregation. The origin point can accept all formats supported by the `geo_point` <<mapping-geo-point-type,type>>:
|
The specified field must be of type `geo_point` (which can only be set explicitly in the mappings). And it can also hold an array of `geo_point` fields, in which case all will be taken into account during aggregation. The origin point can accept all formats supported by the <<geo-point,`geo_point` type>>:
|
||||||
|
|
||||||
* Object format: `{ "lat" : 52.3760, "lon" : 4.894 }` - this is the safest format as it is the most explicit about the `lat` & `lon` values
|
* Object format: `{ "lat" : 52.3760, "lon" : 4.894 }` - this is the safest format as it is the most explicit about the `lat` & `lon` values
|
||||||
* String format: `"52.3760, 4.894"` - where the first number is the `lat` and the second is the `lon`
|
* String format: `"52.3760, 4.894"` - where the first number is the `lat` and the second is the `lon`
|
||||||
|
|
|
@ -200,7 +200,7 @@ and therefore can't be used in the `order` option of the `terms` aggregator.
|
||||||
If the `top_hits` aggregator is wrapped in a `nested` or `reverse_nested` aggregator then nested hits are being returned.
|
If the `top_hits` aggregator is wrapped in a `nested` or `reverse_nested` aggregator then nested hits are being returned.
|
||||||
Nested hits are in a sense hidden mini documents that are part of regular document where in the mapping a nested field type
|
Nested hits are in a sense hidden mini documents that are part of regular document where in the mapping a nested field type
|
||||||
has been configured. The `top_hits` aggregator has the ability to un-hide these documents if it is wrapped in a `nested`
|
has been configured. The `top_hits` aggregator has the ability to un-hide these documents if it is wrapped in a `nested`
|
||||||
or `reverse_nested` aggregator. Read more about nested in the <<mapping-nested-type,nested type mapping>>.
|
or `reverse_nested` aggregator. Read more about nested in the <<nested,nested type mapping>>.
|
||||||
|
|
||||||
If nested type has been configured a single document is actually indexed as multiple Lucene documents and they share
|
If nested type has been configured a single document is actually indexed as multiple Lucene documents and they share
|
||||||
the same id. In order to determine the identity of a nested hit there is more needed than just the id, so that is why
|
the same id. In order to determine the identity of a nested hit there is more needed than just the id, so that is why
|
||||||
|
|
|
@ -152,6 +152,34 @@ being consumed by a monitoring tool, rather than intended for human
|
||||||
consumption. The default for the `human` flag is
|
consumption. The default for the `human` flag is
|
||||||
`false`.
|
`false`.
|
||||||
|
|
||||||
|
[[date-math]]
|
||||||
|
[float]
|
||||||
|
=== Date Math
|
||||||
|
|
||||||
|
Most parameters which accept a formatted date value -- such as `gt` and `lt`
|
||||||
|
in <<query-dsl-range-query,range queries>> `range` queries, or `from` and `to`
|
||||||
|
in <<search-aggregations-bucket-daterange-aggregation,`daterange`
|
||||||
|
aggregations>> -- understand date maths.
|
||||||
|
|
||||||
|
The expression starts with an anchor date, which can either be `now`, or a
|
||||||
|
date string ending with `||`. This anchor date can optionally be followed by
|
||||||
|
one or more maths expressions:
|
||||||
|
|
||||||
|
* `+1h` - add one hour
|
||||||
|
* `-1d` - subtract one day
|
||||||
|
* `/d` - round down to the nearest day
|
||||||
|
|
||||||
|
The supported <<time-units,time units>> are: `y` (year), `M` (month), `w` (week),
|
||||||
|
`d` (day), `h` (hour), `m` (minute), and `s` (second).
|
||||||
|
|
||||||
|
Some examples are:
|
||||||
|
|
||||||
|
[horizontal]
|
||||||
|
`now+1h`:: The current time plus one hour, with ms resolution.
|
||||||
|
`now+1h+1m`:: The current time plus one hour plus one minute, with ms resolution.
|
||||||
|
`now+1h/d`:: The current time plus one hour, rounded down to the nearest day.
|
||||||
|
`2015-01-01||+1M/d`:: `2015-01-01` plus one month, rounded down to the nearest day.
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
=== Response Filtering
|
=== Response Filtering
|
||||||
|
|
||||||
|
@ -237,10 +265,10 @@ curl 'localhost:9200/_segments?pretty&filter_path=indices.**.version'
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
Note that elasticsearch sometimes returns directly the raw value of a field,
|
Note that elasticsearch sometimes returns directly the raw value of a field,
|
||||||
like the `_source` field. If you want to filter _source fields, you should
|
like the `_source` field. If you want to filter `_source` fields, you should
|
||||||
consider combining the already existing `_source` parameter (see
|
consider combining the already existing `_source` parameter (see
|
||||||
<<get-source-filtering,Get API>> for more details) with the `filter_path`
|
<<get-source-filtering,Get API>> for more details) with the `filter_path`
|
||||||
parameter like this:
|
parameter like this:
|
||||||
|
|
||||||
[source,sh]
|
[source,sh]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
@ -318,8 +346,9 @@ of supporting the native JSON number types.
|
||||||
[float]
|
[float]
|
||||||
=== Time units
|
=== Time units
|
||||||
|
|
||||||
Whenever durations need to be specified, eg for a `timeout` parameter, the duration
|
Whenever durations need to be specified, eg for a `timeout` parameter, the
|
||||||
can be specified as a whole number representing time in milliseconds, or as a time value like `2d` for 2 days. The supported units are:
|
duration must specify the unit, like `2d` for 2 days. The supported units
|
||||||
|
are:
|
||||||
|
|
||||||
[horizontal]
|
[horizontal]
|
||||||
`y`:: Year
|
`y`:: Year
|
||||||
|
@ -329,6 +358,7 @@ can be specified as a whole number representing time in milliseconds, or as a ti
|
||||||
`h`:: Hour
|
`h`:: Hour
|
||||||
`m`:: Minute
|
`m`:: Minute
|
||||||
`s`:: Second
|
`s`:: Second
|
||||||
|
`ms`:: Milli-second
|
||||||
|
|
||||||
[[distance-units]]
|
[[distance-units]]
|
||||||
[float]
|
[float]
|
||||||
|
|
|
@ -6,53 +6,3 @@ added to an index either when creating it or by using the put mapping
|
||||||
api. It also handles the dynamic mapping support for types that have no
|
api. It also handles the dynamic mapping support for types that have no
|
||||||
explicit mappings pre defined. For more information about mapping
|
explicit mappings pre defined. For more information about mapping
|
||||||
definitions, check out the <<mapping,mapping section>>.
|
definitions, check out the <<mapping,mapping section>>.
|
||||||
|
|
||||||
[float]
|
|
||||||
=== Dynamic Mappings
|
|
||||||
|
|
||||||
New types and new fields within types can be added dynamically just
|
|
||||||
by indexing a document. When Elasticsearch encounters a new type,
|
|
||||||
it creates the type using the `_default_` mapping (see below).
|
|
||||||
|
|
||||||
When it encounters a new field within a type, it autodetects the
|
|
||||||
datatype that the field contains and adds it to the type mapping
|
|
||||||
automatically.
|
|
||||||
|
|
||||||
See <<mapping-dynamic-mapping>> for details of how to control and
|
|
||||||
configure dynamic mapping.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
=== Default Mapping
|
|
||||||
|
|
||||||
When a new type is created (at <<indices-create-index,index creation>> time,
|
|
||||||
using the <<indices-put-mapping,`put-mapping` API>> or just by indexing a
|
|
||||||
document into it), the type uses the `_default_` mapping as its basis. Any
|
|
||||||
mapping specified in the <<indices-create-index,`create-index`>> or
|
|
||||||
<<indices-put-mapping,`put-mapping`>> request override values set in the
|
|
||||||
`_default_` mapping.
|
|
||||||
|
|
||||||
The default mapping definition is a plain mapping definition that is
|
|
||||||
embedded within Elasticsearch:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
_default_ : {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
Pretty short, isn't it? Basically, everything is `_default_`ed, including the
|
|
||||||
dynamic nature of the root object mapping which allows new fields to be added
|
|
||||||
automatically.
|
|
||||||
|
|
||||||
The default mapping can be overridden by specifying the `_default_` type when
|
|
||||||
creating a new index.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
=== Mapper settings
|
|
||||||
|
|
||||||
`index.mapper.dynamic` (_dynamic_)::
|
|
||||||
|
|
||||||
Dynamic creation of mappings for unmapped types can be completely
|
|
||||||
disabled by setting `index.mapper.dynamic` to `false`.
|
|
||||||
|
|
|
@ -6,8 +6,8 @@ are scored. Similarity is per field, meaning that via the mapping one
|
||||||
can define a different similarity per field.
|
can define a different similarity per field.
|
||||||
|
|
||||||
Configuring a custom similarity is considered a expert feature and the
|
Configuring a custom similarity is considered a expert feature and the
|
||||||
builtin similarities are most likely sufficient as is described in the
|
builtin similarities are most likely sufficient as is described in
|
||||||
<<mapping-core-types,mapping section>>
|
<<similarity>>.
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[configuration]]
|
[[configuration]]
|
||||||
|
@ -41,7 +41,7 @@ Here we configure the DFRSimilarity so it can be referenced as
|
||||||
"properties" : {
|
"properties" : {
|
||||||
"title" : { "type" : "string", "similarity" : "my_similarity" }
|
"title" : { "type" : "string", "similarity" : "my_similarity" }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
|
@ -52,9 +52,9 @@ Here we configure the DFRSimilarity so it can be referenced as
|
||||||
==== Default similarity
|
==== Default similarity
|
||||||
|
|
||||||
The default similarity that is based on the TF/IDF model. This
|
The default similarity that is based on the TF/IDF model. This
|
||||||
similarity has the following option:
|
similarity has the following option:
|
||||||
|
|
||||||
`discount_overlaps`::
|
`discount_overlaps`::
|
||||||
Determines whether overlap tokens (Tokens with
|
Determines whether overlap tokens (Tokens with
|
||||||
0 position increment) are ignored when computing norm. By default this
|
0 position increment) are ignored when computing norm. By default this
|
||||||
is true, meaning overlap tokens do not count when computing norms.
|
is true, meaning overlap tokens do not count when computing norms.
|
||||||
|
@ -71,14 +71,14 @@ http://en.wikipedia.org/wiki/Okapi_BM25[Okapi_BM25] for more details.
|
||||||
This similarity has the following options:
|
This similarity has the following options:
|
||||||
|
|
||||||
[horizontal]
|
[horizontal]
|
||||||
`k1`::
|
`k1`::
|
||||||
Controls non-linear term frequency normalization
|
Controls non-linear term frequency normalization
|
||||||
(saturation).
|
(saturation).
|
||||||
|
|
||||||
`b`::
|
`b`::
|
||||||
Controls to what degree document length normalizes tf values.
|
Controls to what degree document length normalizes tf values.
|
||||||
|
|
||||||
`discount_overlaps`::
|
`discount_overlaps`::
|
||||||
Determines whether overlap tokens (Tokens with
|
Determines whether overlap tokens (Tokens with
|
||||||
0 position increment) are ignored when computing norm. By default this
|
0 position increment) are ignored when computing norm. By default this
|
||||||
is true, meaning overlap tokens do not count when computing norms.
|
is true, meaning overlap tokens do not count when computing norms.
|
||||||
|
@ -90,17 +90,17 @@ Type name: `BM25`
|
||||||
==== DFR similarity
|
==== DFR similarity
|
||||||
|
|
||||||
Similarity that implements the
|
Similarity that implements the
|
||||||
http://lucene.apache.org/core/4_1_0/core/org/apache/lucene/search/similarities/DFRSimilarity.html[divergence
|
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/DFRSimilarity.html[divergence
|
||||||
from randomness] framework. This similarity has the following options:
|
from randomness] framework. This similarity has the following options:
|
||||||
|
|
||||||
[horizontal]
|
[horizontal]
|
||||||
`basic_model`::
|
`basic_model`::
|
||||||
Possible values: `be`, `d`, `g`, `if`, `in`, `ine` and `p`.
|
Possible values: `be`, `d`, `g`, `if`, `in`, `ine` and `p`.
|
||||||
|
|
||||||
`after_effect`::
|
`after_effect`::
|
||||||
Possible values: `no`, `b` and `l`.
|
Possible values: `no`, `b` and `l`.
|
||||||
|
|
||||||
`normalization`::
|
`normalization`::
|
||||||
Possible values: `no`, `h1`, `h2`, `h3` and `z`.
|
Possible values: `no`, `h1`, `h2`, `h3` and `z`.
|
||||||
|
|
||||||
All options but the first option need a normalization value.
|
All options but the first option need a normalization value.
|
||||||
|
@ -111,12 +111,12 @@ Type name: `DFR`
|
||||||
[[ib]]
|
[[ib]]
|
||||||
==== IB similarity.
|
==== IB similarity.
|
||||||
|
|
||||||
http://lucene.apache.org/core/4_1_0/core/org/apache/lucene/search/similarities/IBSimilarity.html[Information
|
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/IBSimilarity.html[Information
|
||||||
based model] . This similarity has the following options:
|
based model] . This similarity has the following options:
|
||||||
|
|
||||||
[horizontal]
|
[horizontal]
|
||||||
`distribution`:: Possible values: `ll` and `spl`.
|
`distribution`:: Possible values: `ll` and `spl`.
|
||||||
`lambda`:: Possible values: `df` and `ttf`.
|
`lambda`:: Possible values: `df` and `ttf`.
|
||||||
`normalization`:: Same as in `DFR` similarity.
|
`normalization`:: Same as in `DFR` similarity.
|
||||||
|
|
||||||
Type name: `IB`
|
Type name: `IB`
|
||||||
|
@ -125,7 +125,7 @@ Type name: `IB`
|
||||||
[[lm_dirichlet]]
|
[[lm_dirichlet]]
|
||||||
==== LM Dirichlet similarity.
|
==== LM Dirichlet similarity.
|
||||||
|
|
||||||
http://lucene.apache.org/core/4_7_1/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html[LM
|
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html[LM
|
||||||
Dirichlet similarity] . This similarity has the following options:
|
Dirichlet similarity] . This similarity has the following options:
|
||||||
|
|
||||||
[horizontal]
|
[horizontal]
|
||||||
|
@ -137,7 +137,7 @@ Type name: `LMDirichlet`
|
||||||
[[lm_jelinek_mercer]]
|
[[lm_jelinek_mercer]]
|
||||||
==== LM Jelinek Mercer similarity.
|
==== LM Jelinek Mercer similarity.
|
||||||
|
|
||||||
http://lucene.apache.org/core/4_7_1/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html[LM
|
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html[LM
|
||||||
Jelinek Mercer similarity] . This similarity has the following options:
|
Jelinek Mercer similarity] . This similarity has the following options:
|
||||||
|
|
||||||
[horizontal]
|
[horizontal]
|
||||||
|
|
|
@ -3,76 +3,173 @@
|
||||||
|
|
||||||
[partintro]
|
[partintro]
|
||||||
--
|
--
|
||||||
Mapping is the process of defining how a document should be mapped to
|
|
||||||
the Search Engine, including its searchable characteristics such as
|
|
||||||
which fields are searchable and if/how they are tokenized. In
|
|
||||||
Elasticsearch, an index may store documents of different "mapping
|
|
||||||
types". Elasticsearch allows one to associate multiple mapping
|
|
||||||
definitions for each mapping type.
|
|
||||||
|
|
||||||
Explicit mapping is defined on an index/type level. By default, there
|
Mapping is the process of defining how a document, and the fields it contains,
|
||||||
isn't a need to define an explicit mapping, since one is automatically
|
are stored and indexed. For instance, use mappings to define:
|
||||||
created and registered when a new type or new field is introduced (with
|
|
||||||
no performance overhead) and have sensible defaults. Only when the
|
* which string fields should be treated as full text fields.
|
||||||
defaults need to be overridden must a mapping definition be provided.
|
* which fields contain numbers, dates, or geolocations.
|
||||||
|
* whether the values of all fields in the document should be
|
||||||
|
indexed into the catch-all <<mapping-all-field,`_all`>> field.
|
||||||
|
* the <<mapping-date-format,format>> of date values.
|
||||||
|
* custom rules to control the mapping for
|
||||||
|
<<dynamic-mapping,dynamically added fields>>.
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[all-mapping-types]]
|
[[mapping-type]]
|
||||||
=== Mapping Types
|
== Mapping Types
|
||||||
|
|
||||||
Mapping types are a way to divide the documents in an index into logical
|
Each index has one or more _mapping types_, which are used to divide the
|
||||||
groups. Think of it as tables in a database. Though there is separation
|
documents in an index into logical groups. User documents might be stored in a
|
||||||
between types, it's not a full separation (all end up as a document
|
`user` type, and blog posts in a `blogpost` type.
|
||||||
within the same Lucene index).
|
|
||||||
|
|
||||||
Field names with the same name across types are highly recommended to
|
Each mapping type has:
|
||||||
have the same type and same mapping characteristics (analysis settings
|
|
||||||
for example). There is an effort to allow to explicitly "choose" which
|
<<mapping-fields,Meta-fields>>::
|
||||||
field to use by using type prefix (`my_type.my_field`), but it's not
|
|
||||||
complete, and there are places where it will never work (like
|
Meta-fields are used to customize how a document's metadata associated is
|
||||||
aggregations on the field).
|
treated. Examples of meta-fields include the document's
|
||||||
|
<<mapping-index-field,`_index`>>, <<mapping-type-field,`_type`>>,
|
||||||
|
<<mapping-id-field,`_id`>>, and <<mapping-source-field,`_source`>> fields.
|
||||||
|
|
||||||
|
<<mapping-types,Fields>> or _properties_::
|
||||||
|
|
||||||
|
Each mapping type contains a list of fields or `properties` pertinent to that
|
||||||
|
type. A `user` type might contain `title`, `name`, and `age` fields, while a
|
||||||
|
`blogpost` type might contain `title`, `body`, `user_id` and `created` fields.
|
||||||
|
Fields with the same name in different mapping types in the same index
|
||||||
|
<<field-conflicts,must have the same mapping>>.
|
||||||
|
|
||||||
In practice though, this restriction is almost never an issue. The field
|
|
||||||
name usually ends up being a good indication to its "typeness" (e.g.
|
|
||||||
"first_name" will always be a string). Note also, that this does not
|
|
||||||
apply to the cross index case.
|
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[mapping-api]]
|
== Field datatypes
|
||||||
=== Mapping API
|
|
||||||
|
|
||||||
To create a mapping, you will need the <<indices-put-mapping,Put Mapping
|
Each field has a data `type` which can be:
|
||||||
API>>, or you can add multiple mappings when you <<indices-create-index,create an
|
|
||||||
index>>.
|
* a simple type like <<string,`string`>>, <<date,`date`>>, <<number,`long`>>,
|
||||||
|
<<number,`double`>>, <<boolean,`boolean`>> or <<ip,`ip`>>.
|
||||||
|
* a type which supports the hierarchical nature of JSON such as
|
||||||
|
<<object,`object`>> or <<nested,`nested`>>.
|
||||||
|
* or a specialised type like <<geo-point,`geo_point`>>,
|
||||||
|
<<geo-shape,`geo_shape`>>, or <<search-suggesters-completion,`completion`>>.
|
||||||
|
|
||||||
|
It is often useful to index the same field in different ways for different
|
||||||
|
purposes. For instance, a `string` field could be <<mapping-index,indexed>> as
|
||||||
|
an `analyzed` field for full-text search, and as a `not_analyzed` field for
|
||||||
|
sorting or aggregations. Alternatively, you could index a string field with
|
||||||
|
the <<analysis-standard-analyzer,`standard` analyzer>>, the
|
||||||
|
<<english-analyzer,`english`>> analyzer, and the
|
||||||
|
<<french-analyzer,`french` analyzer>>.
|
||||||
|
|
||||||
|
This is the purpose of _multi-fields_. Most datatypes support multi-fields
|
||||||
|
via the <<multi-fields>> parameter.
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[mapping-settings]]
|
== Dynamic mapping
|
||||||
=== Global Settings
|
|
||||||
|
Fields and mapping types do not need to be defined before being used. Thanks
|
||||||
|
to _dynamic mapping_, new mapping types and new field names will be added
|
||||||
|
automatically, just by indexing a document. New fields can be added both to
|
||||||
|
the top-level mapping type, and to inner <<object,`object`>> and
|
||||||
|
<<nested,`nested`>> fields.
|
||||||
|
|
||||||
|
The
|
||||||
|
<<dynamic-mapping,dynamic mapping>> rules can be configured to
|
||||||
|
customise the mapping that is used for new types and new fields.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
== Explicit mappings
|
||||||
|
|
||||||
|
You know more about your data than Elasticsearch can guess, so while dynamic
|
||||||
|
mapping can be useful to get started, at some point you will want to specify
|
||||||
|
your own explicit mappings.
|
||||||
|
|
||||||
|
You can create mapping types and field mappings when you
|
||||||
|
<<indices-create-index,create an index>>, and you can add mapping types and
|
||||||
|
fields to an existing index with the <<indices-put-mapping,PUT mapping API>>.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
== Updating existing mappings
|
||||||
|
|
||||||
|
Other than where documented, *existing type and field mappings cannot be
|
||||||
|
updated*. Changing the mapping would mean invalidating already indexed
|
||||||
|
documents. Instead, you should create a new index with the correct mappings
|
||||||
|
and reindex your data into that index.
|
||||||
|
|
||||||
|
[[field-conflicts]]
|
||||||
|
[float]
|
||||||
|
== Fields are shared across mapping types
|
||||||
|
|
||||||
|
Mapping types are used to group fields, but the fields in each mapping type
|
||||||
|
are not independent of each other. Fields with:
|
||||||
|
|
||||||
|
* the _same name_
|
||||||
|
* in the _same index_
|
||||||
|
* in _different mapping types_
|
||||||
|
* map to the _same field_ internally,
|
||||||
|
* and *must have the same mapping*.
|
||||||
|
|
||||||
|
If a `title` field exists in both the `user` and `blogpost` mapping types, the
|
||||||
|
`title` fields must have exactly the same mapping in each type. The only
|
||||||
|
exceptions to this rule are the <<copy-to>>, <<dynamic>>, <<enabled>>,
|
||||||
|
<<ignore-above>>, <<include-in-all>>, and <<properties>> parameters, which may
|
||||||
|
have different settings per field.
|
||||||
|
|
||||||
|
Usually, fields with the same name also contain the same type of data, so
|
||||||
|
having the same mapping is not a problem. When conflicts do arise, these can
|
||||||
|
be solved by choosing more descriptive names, such as `user_title` and
|
||||||
|
`blog_title`.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
== Example mapping
|
||||||
|
|
||||||
|
A mapping for the example described above could be specified when creating the
|
||||||
|
index, as follows:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
---------------------------------------
|
||||||
|
PUT my_index <1>
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"user": { <2>
|
||||||
|
"_all": { "enabled": false }, <3>
|
||||||
|
"properties": { <4>
|
||||||
|
"title": { "type": "string" }, <5>
|
||||||
|
"name": { "type": "string" }, <5>
|
||||||
|
"age": { "type": "integer" } <5>
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"blogpost": { <2>
|
||||||
|
"properties": { <4>
|
||||||
|
"title": { "type": "string" }, <5>
|
||||||
|
"body": { "type": "string" }, <5>
|
||||||
|
"user_id": {
|
||||||
|
"type": "string", <5>
|
||||||
|
"index": "not_analyzed"
|
||||||
|
},
|
||||||
|
"created": {
|
||||||
|
"type": "date", <5>
|
||||||
|
"format": "strict_date_optional_time||epoch_millis"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
---------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> Create an index called `my_index`.
|
||||||
|
<2> Add mapping types called `user` and `blogpost`.
|
||||||
|
<3> Disable the `_all` <<mapping-fields,meta field>> for the `user` mapping type.
|
||||||
|
<4> Specify fields or _properties_ in each mapping type.
|
||||||
|
<5> Specify the data `type` and mapping for each field.
|
||||||
|
|
||||||
The `index.mapping.ignore_malformed` global setting can be set on the
|
|
||||||
index level to allow to ignore malformed content globally across all
|
|
||||||
mapping types (malformed content example is trying to index a text string
|
|
||||||
value as a numeric type).
|
|
||||||
|
|
||||||
The `index.mapping.coerce` global setting can be set on the
|
|
||||||
index level to coerce numeric content globally across all
|
|
||||||
mapping types (The default setting is true and coercions attempted are
|
|
||||||
to convert strings with numbers into numeric types and also numeric values
|
|
||||||
with fractions to any integer/short/long values minus the fraction part).
|
|
||||||
When the permitted conversions fail in their attempts, the value is considered
|
|
||||||
malformed and the ignore_malformed setting dictates what will happen next.
|
|
||||||
--
|
--
|
||||||
|
|
||||||
include::mapping/fields.asciidoc[]
|
|
||||||
|
|
||||||
include::mapping/types.asciidoc[]
|
include::mapping/types.asciidoc[]
|
||||||
|
|
||||||
include::mapping/date-format.asciidoc[]
|
include::mapping/fields.asciidoc[]
|
||||||
|
|
||||||
include::mapping/fielddata_formats.asciidoc[]
|
include::mapping/params.asciidoc[]
|
||||||
|
|
||||||
include::mapping/dynamic-mapping.asciidoc[]
|
include::mapping/dynamic-mapping.asciidoc[]
|
||||||
|
|
||||||
include::mapping/meta.asciidoc[]
|
|
||||||
|
|
||||||
include::mapping/transform.asciidoc[]
|
|
||||||
|
|
|
@ -1,238 +0,0 @@
|
||||||
[[mapping-date-format]]
|
|
||||||
== Date Format
|
|
||||||
|
|
||||||
In JSON documents, dates are represented as strings. Elasticsearch uses a set
|
|
||||||
of pre-configured format to recognize and convert those, but you can change the
|
|
||||||
defaults by specifying the `format` option when defining a `date` type, or by
|
|
||||||
specifying `dynamic_date_formats` in the `root object` mapping (which will
|
|
||||||
be used unless explicitly overridden by a `date` type). There are built in
|
|
||||||
formats supported, as well as complete custom one.
|
|
||||||
|
|
||||||
The parsing of dates uses http://www.joda.org/joda-time/[Joda]. The
|
|
||||||
default date parsing used if no format is specified is
|
|
||||||
http://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateOptionalTimeParser--[ISODateTimeFormat.dateOptionalTimeParser].
|
|
||||||
|
|
||||||
An extension to the format allow to define several formats using `||`
|
|
||||||
separator. This allows to define less strict formats that can be used,
|
|
||||||
for example, the `yyyy/MM/dd HH:mm:ss||yyyy/MM/dd` format will parse
|
|
||||||
both `yyyy/MM/dd HH:mm:ss` and `yyyy/MM/dd`. The first format will also
|
|
||||||
act as the one that converts back from milliseconds to a string
|
|
||||||
representation.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[date-math]]
|
|
||||||
=== Date Math
|
|
||||||
|
|
||||||
The `date` type supports using date math expression when using it in a
|
|
||||||
query/filter (mainly makes sense in `range` query/filter).
|
|
||||||
|
|
||||||
The expression starts with an "anchor" date, which can be either `now`
|
|
||||||
or a date string (in the applicable format) ending with `||`. It can
|
|
||||||
then follow by a math expression, supporting `+`, `-` and `/`
|
|
||||||
(rounding). The units supported are `y` (year), `M` (month), `w` (week),
|
|
||||||
`d` (day), `h` (hour), `m` (minute), and `s` (second).
|
|
||||||
|
|
||||||
Here are some samples: `now+1h`, `now+1h+1m`, `now+1h/d`,
|
|
||||||
`2012-01-01||+1M/d`.
|
|
||||||
|
|
||||||
When doing `range` type searches with rounding, the value parsed
|
|
||||||
depends on whether the end of the range is inclusive or exclusive, and
|
|
||||||
whether the beginning or end of the range. Rounding up moves to the
|
|
||||||
last millisecond of the rounding scope, and rounding down to the
|
|
||||||
first millisecond of the rounding scope. The semantics work as follows:
|
|
||||||
* `gt` - round up, and use > that value (`2014-11-18||/M` becomes `2014-11-30T23:59:59.999`, ie excluding the entire month)
|
|
||||||
* `gte` - round D down, and use >= that value (`2014-11-18||/M` becomes `2014-11-01`, ie including the entire month)
|
|
||||||
* `lt` - round D down, and use < that value (`2014-11-18||/M` becomes `2014-11-01`, ie excluding the entire month)
|
|
||||||
* `lte` - round D up, and use <= that value(`2014-11-18||/M` becomes `2014-11-30T23:59:59.999`, ie including the entire month)
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[built-in]]
|
|
||||||
=== Built In Formats
|
|
||||||
|
|
||||||
Most of the below dates have a `strict` companion dates, which means, that
|
|
||||||
year, month and day parts of the week must have prepending zeros in order
|
|
||||||
to be valid. This means, that a date like `5/11/1` would not be valid, but
|
|
||||||
you would need to specify the full date, which would be `2005/11/01` in this
|
|
||||||
example. So instead of `date_optional_time` you would need to specify
|
|
||||||
`strict_date_optional_time`.
|
|
||||||
|
|
||||||
The following tables lists all the defaults ISO formats supported:
|
|
||||||
|
|
||||||
[cols="<,<",options="header",]
|
|
||||||
|=======================================================================
|
|
||||||
|Name |Description
|
|
||||||
|`basic_date`|A basic formatter for a full date as four digit year, two
|
|
||||||
digit month of year, and two digit day of month (yyyyMMdd).
|
|
||||||
|
|
||||||
|`basic_date_time`|A basic formatter that combines a basic date and time,
|
|
||||||
separated by a 'T' (yyyyMMdd'T'HHmmss.SSSZ).
|
|
||||||
|
|
||||||
|`basic_date_time_no_millis`|A basic formatter that combines a basic date
|
|
||||||
and time without millis, separated by a 'T' (yyyyMMdd'T'HHmmssZ).
|
|
||||||
|
|
||||||
|`basic_ordinal_date`|A formatter for a full ordinal date, using a four
|
|
||||||
digit year and three digit dayOfYear (yyyyDDD).
|
|
||||||
|
|
||||||
|`basic_ordinal_date_time`|A formatter for a full ordinal date and time,
|
|
||||||
using a four digit year and three digit dayOfYear
|
|
||||||
(yyyyDDD'T'HHmmss.SSSZ).
|
|
||||||
|
|
||||||
|`basic_ordinal_date_time_no_millis`|A formatter for a full ordinal date
|
|
||||||
and time without millis, using a four digit year and three digit
|
|
||||||
dayOfYear (yyyyDDD'T'HHmmssZ).
|
|
||||||
|
|
||||||
|`basic_time`|A basic formatter for a two digit hour of day, two digit
|
|
||||||
minute of hour, two digit second of minute, three digit millis, and time
|
|
||||||
zone offset (HHmmss.SSSZ).
|
|
||||||
|
|
||||||
|`basic_time_no_millis`|A basic formatter for a two digit hour of day,
|
|
||||||
two digit minute of hour, two digit second of minute, and time zone
|
|
||||||
offset (HHmmssZ).
|
|
||||||
|
|
||||||
|`basic_t_time`|A basic formatter for a two digit hour of day, two digit
|
|
||||||
minute of hour, two digit second of minute, three digit millis, and time
|
|
||||||
zone off set prefixed by 'T' ('T'HHmmss.SSSZ).
|
|
||||||
|
|
||||||
|`basic_t_time_no_millis`|A basic formatter for a two digit hour of day,
|
|
||||||
two digit minute of hour, two digit second of minute, and time zone
|
|
||||||
offset prefixed by 'T' ('T'HHmmssZ).
|
|
||||||
|
|
||||||
|`basic_week_date`|A basic formatter for a full date as four digit
|
|
||||||
weekyear, two digit week of weekyear, and one digit day of week
|
|
||||||
(xxxx'W'wwe). `strict_basic_week_date` is supported.
|
|
||||||
|
|
||||||
|`basic_week_date_time`|A basic formatter that combines a basic weekyear
|
|
||||||
date and time, separated by a 'T' (xxxx'W'wwe'T'HHmmss.SSSZ).
|
|
||||||
`strict_basic_week_date_time` is supported.
|
|
||||||
|
|
||||||
|`basic_week_date_time_no_millis`|A basic formatter that combines a basic
|
|
||||||
weekyear date and time without millis, separated by a 'T'
|
|
||||||
(xxxx'W'wwe'T'HHmmssZ). `strict_week_date_time` is supported.
|
|
||||||
|
|
||||||
|`date`|A formatter for a full date as four digit year, two digit month
|
|
||||||
of year, and two digit day of month (yyyy-MM-dd). `strict_date` is supported.
|
|
||||||
_
|
|
||||||
|`date_hour`|A formatter that combines a full date and two digit hour of
|
|
||||||
day. strict_date_hour` is supported.
|
|
||||||
|
|
||||||
|
|
||||||
|`date_hour_minute`|A formatter that combines a full date, two digit hour
|
|
||||||
of day, and two digit minute of hour. strict_date_hour_minute` is supported.
|
|
||||||
|
|
||||||
|`date_hour_minute_second`|A formatter that combines a full date, two
|
|
||||||
digit hour of day, two digit minute of hour, and two digit second of
|
|
||||||
minute. `strict_date_hour_minute_second` is supported.
|
|
||||||
|
|
||||||
|`date_hour_minute_second_fraction`|A formatter that combines a full
|
|
||||||
date, two digit hour of day, two digit minute of hour, two digit second
|
|
||||||
of minute, and three digit fraction of second
|
|
||||||
(yyyy-MM-dd'T'HH:mm:ss.SSS). `strict_date_hour_minute_second_fraction` is supported.
|
|
||||||
|
|
||||||
|`date_hour_minute_second_millis`|A formatter that combines a full date,
|
|
||||||
two digit hour of day, two digit minute of hour, two digit second of
|
|
||||||
minute, and three digit fraction of second (yyyy-MM-dd'T'HH:mm:ss.SSS).
|
|
||||||
`strict_date_hour_minute_second_millis` is supported.
|
|
||||||
|
|
||||||
|`date_optional_time`|a generic ISO datetime parser where the date is
|
|
||||||
mandatory and the time is optional. `strict_date_optional_time` is supported.
|
|
||||||
|
|
||||||
|`date_time`|A formatter that combines a full date and time, separated by
|
|
||||||
a 'T' (yyyy-MM-dd'T'HH:mm:ss.SSSZZ). `strict_date_time` is supported.
|
|
||||||
|
|
||||||
|`date_time_no_millis`|A formatter that combines a full date and time
|
|
||||||
without millis, separated by a 'T' (yyyy-MM-dd'T'HH:mm:ssZZ).
|
|
||||||
`strict_date_time_no_millis` is supported.
|
|
||||||
|
|
||||||
|`hour`|A formatter for a two digit hour of day. `strict_hour` is supported.
|
|
||||||
|
|
||||||
|`hour_minute`|A formatter for a two digit hour of day and two digit
|
|
||||||
minute of hour. `strict_hour_minute` is supported.
|
|
||||||
|
|
||||||
|`hour_minute_second`|A formatter for a two digit hour of day, two digit
|
|
||||||
minute of hour, and two digit second of minute.
|
|
||||||
`strict_hour_minute_second` is supported.
|
|
||||||
|
|
||||||
|`hour_minute_second_fraction`|A formatter for a two digit hour of day,
|
|
||||||
two digit minute of hour, two digit second of minute, and three digit
|
|
||||||
fraction of second (HH:mm:ss.SSS).
|
|
||||||
`strict_hour_minute_second_fraction` is supported.
|
|
||||||
|
|
||||||
|`hour_minute_second_millis`|A formatter for a two digit hour of day, two
|
|
||||||
digit minute of hour, two digit second of minute, and three digit
|
|
||||||
fraction of second (HH:mm:ss.SSS).
|
|
||||||
`strict_hour_minute_second_millis` is supported.
|
|
||||||
|
|
||||||
|`ordinal_date`|A formatter for a full ordinal date, using a four digit
|
|
||||||
year and three digit dayOfYear (yyyy-DDD). `strict_ordinal_date` is supported.
|
|
||||||
|
|
||||||
|`ordinal_date_time`|A formatter for a full ordinal date and time, using
|
|
||||||
a four digit year and three digit dayOfYear (yyyy-DDD'T'HH:mm:ss.SSSZZ).
|
|
||||||
`strict_ordinal_date_time` is supported.
|
|
||||||
|
|
||||||
|`ordinal_date_time_no_millis`|A formatter for a full ordinal date and
|
|
||||||
time without millis, using a four digit year and three digit dayOfYear
|
|
||||||
(yyyy-DDD'T'HH:mm:ssZZ).
|
|
||||||
`strict_ordinal_date_time_no_millis` is supported.
|
|
||||||
|
|
||||||
|`time`|A formatter for a two digit hour of day, two digit minute of
|
|
||||||
hour, two digit second of minute, three digit fraction of second, and
|
|
||||||
time zone offset (HH:mm:ss.SSSZZ). `strict_time` is supported.
|
|
||||||
|
|
||||||
|`time_no_millis`|A formatter for a two digit hour of day, two digit
|
|
||||||
minute of hour, two digit second of minute, and time zone offset
|
|
||||||
(HH:mm:ssZZ). `strict_time_no_millis` is supported.
|
|
||||||
|
|
||||||
|`t_time`|A formatter for a two digit hour of day, two digit minute of
|
|
||||||
hour, two digit second of minute, three digit fraction of second, and
|
|
||||||
time zone offset prefixed by 'T' ('T'HH:mm:ss.SSSZZ).
|
|
||||||
`strict_t_time` is supported.
|
|
||||||
|
|
||||||
|`t_time_no_millis`|A formatter for a two digit hour of day, two digit
|
|
||||||
minute of hour, two digit second of minute, and time zone offset
|
|
||||||
prefixed by 'T' ('T'HH:mm:ssZZ). `strict_t_time_no_millis` is supported.
|
|
||||||
|
|
||||||
|`week_date`|A formatter for a full date as four digit weekyear, two
|
|
||||||
digit week of weekyear, and one digit day of week (xxxx-'W'ww-e).
|
|
||||||
`strict_week_date` is supported.
|
|
||||||
|
|
||||||
|`week_date_time`|A formatter that combines a full weekyear date and
|
|
||||||
time, separated by a 'T' (xxxx-'W'ww-e'T'HH:mm:ss.SSSZZ).
|
|
||||||
`strict_week_date_time` is supported.
|
|
||||||
|
|
||||||
|`week_date_time_no_millis`|A formatter that combines a full weekyear date
|
|
||||||
and time without millis, separated by a 'T' (xxxx-'W'ww-e'T'HH:mm:ssZZ).
|
|
||||||
`strict_week_date_time` is supported.
|
|
||||||
|
|
||||||
|`weekyear`|A formatter for a four digit weekyear. `strict_week_year` is supported.
|
|
||||||
|
|
||||||
|`weekyear_week`|A formatter for a four digit weekyear and two digit week
|
|
||||||
of weekyear. `strict_weekyear_week` is supported.
|
|
||||||
|
|
||||||
|`weekyear_week_day`|A formatter for a four digit weekyear, two digit week
|
|
||||||
of weekyear, and one digit day of week. `strict_weekyear_week_day` is supported.
|
|
||||||
|
|
||||||
|`year`|A formatter for a four digit year. `strict_year` is supported.
|
|
||||||
|
|
||||||
|`year_month`|A formatter for a four digit year and two digit month of
|
|
||||||
year. `strict_year_month` is supported.
|
|
||||||
|
|
||||||
|`year_month_day`|A formatter for a four digit year, two digit month of
|
|
||||||
year, and two digit day of month. `strict_year_month_day` is supported.
|
|
||||||
|
|
||||||
|`epoch_second`|A formatter for the number of seconds since the epoch.
|
|
||||||
Note, that this timestamp allows a max length of 10 chars, so dates
|
|
||||||
older than 1653 and 2286 are not supported. You should use a different
|
|
||||||
date formatter in that case.
|
|
||||||
|
|
||||||
|`epoch_millis`|A formatter for the number of milliseconds since the epoch.
|
|
||||||
Note, that this timestamp allows a max length of 13 chars, so dates
|
|
||||||
older than 1653 and 2286 are not supported. You should use a different
|
|
||||||
date formatter in that case.
|
|
||||||
|=======================================================================
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[custom]]
|
|
||||||
=== Custom Format
|
|
||||||
|
|
||||||
Allows for a completely customizable date format explained
|
|
||||||
http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html[here].
|
|
|
@ -1,73 +1,67 @@
|
||||||
[[mapping-dynamic-mapping]]
|
[[dynamic-mapping]]
|
||||||
== Dynamic Mapping
|
== Dynamic Mapping
|
||||||
|
|
||||||
Default mappings allow generic mapping definitions to be automatically applied
|
One of the most important features of Elasticsearch is that it tries to get
|
||||||
to types that do not have mappings predefined. This is mainly done
|
out of your way and let you start exploring your data as quickly as possible.
|
||||||
thanks to the fact that the
|
To index a document, you don't have to first create an index, define a mapping
|
||||||
<<mapping-object-type,object mapping>> and
|
type, and define your fields -- you can just index a document and the index,
|
||||||
namely the <<mapping-root-object-type,root
|
type, and fields will spring to life automatically:
|
||||||
object mapping>> allow for schema-less dynamic addition of unmapped
|
|
||||||
fields.
|
|
||||||
|
|
||||||
The default mapping definition is a plain mapping definition that is
|
|
||||||
embedded within the distribution:
|
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
{
|
PUT data/counters/1 <1>
|
||||||
"_default_" : {
|
{ "count": 5 }
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> Creates the `data` index, the `counters` mapping type, and a field
|
||||||
|
called `count` with datatype `long`.
|
||||||
|
|
||||||
Pretty short, isn't it? Basically, everything is defaulted, especially the
|
The automatic detection and addition of new types and fields is called
|
||||||
dynamic nature of the root object mapping. The default mapping can be
|
_dynamic mapping_. The dynamic mapping rules can be customised to suit your
|
||||||
overridden by specifying the `_default_` type when creating a new index.
|
purposes with:
|
||||||
|
|
||||||
The dynamic creation of mappings for unmapped types can be completely
|
<<default-mapping,`_default_` mapping>>::
|
||||||
disabled by setting `index.mapper.dynamic` to `false`.
|
|
||||||
|
|
||||||
The dynamic creation of fields within a type can be completely
|
Configure the base mapping to be used for new mapping types.
|
||||||
disabled by setting the `dynamic` property of the type to `strict`.
|
|
||||||
|
|
||||||
Here is a <<indices-put-mapping,Put Mapping>> example that
|
<<dynamic-field-mapping,Dynamic field mappings>>::
|
||||||
disables dynamic field creation for a `tweet`:
|
|
||||||
|
|
||||||
[source,js]
|
The rules governing dynamic field detection.
|
||||||
--------------------------------------------------
|
|
||||||
$ curl -XPUT 'http://localhost:9200/twitter/_mapping/tweet' -d '
|
|
||||||
{
|
|
||||||
"tweet" : {
|
|
||||||
"dynamic": "strict",
|
|
||||||
"properties" : {
|
|
||||||
"message" : {"type" : "string", "store" : true }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
'
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
Here is how we can change the default
|
<<dynamic-templates,Dynamic templates>>::
|
||||||
<<mapping-date-format,date_formats>> used in the
|
|
||||||
root and inner object types:
|
Custom rules to configure the mapping for dynamically added fields.
|
||||||
|
|
||||||
|
TIP: <<indices-templates,Index templates>> allow you to configure the default
|
||||||
|
mappings, settings, aliases, and warmers for new indices, whether created
|
||||||
|
automatically or explicitly.
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"_default_" : {
|
|
||||||
"dynamic_date_formats" : ["yyyy-MM-dd", "dd-MM-yyyy", "date_optional_time"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
=== Unmapped fields in queries
|
=== Disabling automatic type creation
|
||||||
|
|
||||||
Queries and filters can refer to fields that don't exist in a mapping. Whether this
|
Automatic type creation can be disabled by setting the `index.mapper.dynamic`
|
||||||
is allowed is controlled by the `index.query.parse.allow_unmapped_fields` setting.
|
setting to `false`, either by setting the default value in the
|
||||||
This setting defaults to `true`. Setting it to `false` will disallow the usage of
|
`config/elasticsearch.yml` file, or per-index as an index setting:
|
||||||
unmapped fields in queries.
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT /_settings <1>
|
||||||
|
{
|
||||||
|
"index.mapper.dynamic":false
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> Disable automatic type creation for all indices.
|
||||||
|
|
||||||
|
Regardless of the value of this setting, types can still be added explicitly
|
||||||
|
when <<indices-create-index,creating an index>> or with the
|
||||||
|
<<indices-put-mapping,PUT mapping>> API.
|
||||||
|
|
||||||
|
|
||||||
|
include::dynamic/default-mapping.asciidoc[]
|
||||||
|
|
||||||
|
include::dynamic/field-mapping.asciidoc[]
|
||||||
|
|
||||||
|
include::dynamic/templates.asciidoc[]
|
||||||
|
|
||||||
When registering a new <<search-percolate,percolator query>> or creating
|
|
||||||
a <<filtered,filtered alias>> then the `index.query.parse.allow_unmapped_fields` setting
|
|
||||||
is forcefully overwritten to disallowed unmapped fields.
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
[[default-mapping]]
|
||||||
|
=== `_default_` mapping
|
||||||
|
|
||||||
|
The default mapping, which will be used as the base mapping for any new
|
||||||
|
mapping types, can be customised by adding a mapping type with the name
|
||||||
|
`_default_` to an index, either when
|
||||||
|
<<indices-create-index,creating the index>> or later on with the
|
||||||
|
<<indices-put-mapping,PUT mapping>> API.
|
||||||
|
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"_default_": { <1>
|
||||||
|
"_all": {
|
||||||
|
"enabled": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"user": {}, <2>
|
||||||
|
"blogpost": { <3>
|
||||||
|
"_all": {
|
||||||
|
"enabled": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `_default_` mapping defaults the <<mapping-all-field,`_all`>> field to disabled.
|
||||||
|
<2> The `user` type inherits the settings from `_default_`.
|
||||||
|
<3> The `blogpost` type overrides the defaults and enables the <<mapping-all-field,`_all`>> field.
|
||||||
|
|
||||||
|
While the `_default_` mapping can be updated after an index has been created,
|
||||||
|
the new defaults will only affect mapping types that are created afterwards.
|
||||||
|
|
||||||
|
The `_default_` mapping can be used in conjunction with
|
||||||
|
<<indices-templates,Index templates>> to control dynamically created types
|
||||||
|
within automatically created indices:
|
||||||
|
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT _template/logging
|
||||||
|
{
|
||||||
|
"template": "logs-*", <1>
|
||||||
|
"settings": { "number_of_shards": 1 }, <2>
|
||||||
|
"mappings": {
|
||||||
|
"_default_": {
|
||||||
|
"_all": { <3>
|
||||||
|
"enabled": false
|
||||||
|
},
|
||||||
|
"dynamic_templates": [
|
||||||
|
{
|
||||||
|
"strings": { <4>
|
||||||
|
"match_mapping_type": "string",
|
||||||
|
"mapping": {
|
||||||
|
"type": "string",
|
||||||
|
"fields": {
|
||||||
|
"raw": {
|
||||||
|
"type": "string",
|
||||||
|
"index": "not_analyzed",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT logs-2015.10.01/event/1
|
||||||
|
{ "message": "error:16" }
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `logging` template will match any indices beginning with `logs-`.
|
||||||
|
<2> Matching indices will be created with a single primary shard.
|
||||||
|
<3> The `_all` field will be disabled by default for new type mappings.
|
||||||
|
<4> String fields will be created with an `analyzed` main field, and a `not_analyzed` `.raw` field.
|
|
@ -0,0 +1,139 @@
|
||||||
|
[[dynamic-field-mapping]]
|
||||||
|
=== Dynamic field mapping
|
||||||
|
|
||||||
|
By default, when a previously unseen field is found in a document,
|
||||||
|
Elasticsearch will add the new field to the type mapping. This behaviour can
|
||||||
|
be disabled, both at the document and at the <<object,`object`>> level, by
|
||||||
|
setting the <<dynamic,`dynamic`>> parameter to `false` or to `strict`.
|
||||||
|
|
||||||
|
Assuming `dynamic` field mapping is enabled, some simple rules are used to
|
||||||
|
determine which datatype the field should have:
|
||||||
|
|
||||||
|
[horizontal]
|
||||||
|
*JSON datatype*:: *Elasticsearch datatype*
|
||||||
|
|
||||||
|
`null`:: No field is added.
|
||||||
|
`true` or `false`:: <<boolean,`boolean`>> field
|
||||||
|
floating{nbsp}point{nbsp}number:: <<number,`double`>> field
|
||||||
|
integer:: <<number,`long`>> field
|
||||||
|
object:: <<object,`object`>> field
|
||||||
|
array:: Depends on the first non-`null` value in the array.
|
||||||
|
string:: Either a <<date,`date`>> field
|
||||||
|
(if the value passes <<date-detection,date detection>>),
|
||||||
|
a <<number,`double`>> or <<number,`long`>> field
|
||||||
|
(if the value passes <<numeric-detection,numeric detection>>)
|
||||||
|
or an <<mapping-index,`analyzed`>> <<string,`string`>> field.
|
||||||
|
|
||||||
|
These are the only <<mapping-types,field datatypes>> that are dynamically
|
||||||
|
detected. All other datatypes must be mapped explicitly.
|
||||||
|
|
||||||
|
Besides the options listed below, dynamic field mapping rules can be further
|
||||||
|
customised with <<dynamic-templates,`dynamic_templates`>>.
|
||||||
|
|
||||||
|
[[date-detection]]
|
||||||
|
==== Date detection
|
||||||
|
|
||||||
|
If `date_detection` is enabled (default), then new string fields are checked
|
||||||
|
to see whether their contents match any of the date patterns specified in
|
||||||
|
`dynamic_date_formats`. If a match is found, a new <<date,`date`>> field is
|
||||||
|
added with the corresponding format.
|
||||||
|
|
||||||
|
The default value for `dynamic_date_formats` is:
|
||||||
|
|
||||||
|
[ <<strict-date-time,`"strict_date_optional_time"`>>,`"yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"`]
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"create_date": "2015/09/02"
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_mapping <1>
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `create_date` field has been added as a <<date,`date`>>
|
||||||
|
field with the <<mapping-date-format,`format`>>: +
|
||||||
|
`"yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"`.
|
||||||
|
|
||||||
|
===== Disabling date detection
|
||||||
|
|
||||||
|
Dynamic date dection can be disabled by setting `date_detection` to `false`:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"date_detection": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1 <1>
|
||||||
|
{
|
||||||
|
"create": "2015/09/02"
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
<1> The `create_date` field has been added as a <<string,`string`>> field.
|
||||||
|
|
||||||
|
===== Customising detected date formats
|
||||||
|
|
||||||
|
Alternatively, the `dynamic_date_formats` can be customised to support your
|
||||||
|
own <<mapping-date-format,date formats>>:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"dynamic_date_formats": ["MM/dd/yyyy"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"create_date": "09/25/2015"
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
|
||||||
|
[[numeric-detection]]
|
||||||
|
==== Numeric detection
|
||||||
|
|
||||||
|
While JSON has support for native floating point and integer datatypes, some
|
||||||
|
applications or languages may sometimes render numbers as strings. Usually the
|
||||||
|
correct solution is to map these fields explicitly, but numeric detection
|
||||||
|
(which is disabled by default) can be enabled to do this automatically:
|
||||||
|
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"numeric_detection": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"my_float": "1.0", <1>
|
||||||
|
"my_integer": "1" <2>
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `my_float` field is added as a <<number,`double`>> field.
|
||||||
|
<2> The `my_integer` field is added as a <<number,`long`>> field.
|
||||||
|
|
|
@ -0,0 +1,251 @@
|
||||||
|
[[dynamic-templates]]
|
||||||
|
=== Dynamic templates
|
||||||
|
|
||||||
|
Dynamic templates allow you to define custom mappings that can be applied to
|
||||||
|
dynamically added fields based on:
|
||||||
|
|
||||||
|
* the <<dynamic-mapping,datatype>> detected by Elasticsearch, with <<match-mapping-type,`match_mapping_type`>>.
|
||||||
|
* the name of the field, with <<match-unmatch,`match` and `unmatch`>> or <<match-pattern,`match_pattern`>>.
|
||||||
|
* the full dotted path to the field, with <<path-match-unmatch,`path_match` and `path_unmatch`>>.
|
||||||
|
|
||||||
|
The original field name `{name}` and the detected datatype
|
||||||
|
`{dynamic_type`} <<template-variables,template variables>> can be used in
|
||||||
|
the mapping specification as placeholders.
|
||||||
|
|
||||||
|
IMPORTANT: Dynamic field mappings are only added when a field contains a
|
||||||
|
concrete value -- not `null` or an empty array. This means that if the
|
||||||
|
`null_value` option is used in a `dynamic_template`, it will only be applied
|
||||||
|
after the first document with a concrete value for the field has been
|
||||||
|
indexed.
|
||||||
|
|
||||||
|
Dynamic templates are specified as an array of named objects:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
"dynamic_templates": [
|
||||||
|
{
|
||||||
|
"my_template_name": { <1>
|
||||||
|
... match conditions ... <2>
|
||||||
|
"mapping": { ... } <3>
|
||||||
|
}
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
--------------------------------------------------
|
||||||
|
<1> The template name can be any string value.
|
||||||
|
<2> The match conditions can include any of : `match_mapping_type`, `match`, `match_pattern`, `unmatch`, `match_path`, `unmatch_path`.
|
||||||
|
<3> The mapping that the matched field should use.
|
||||||
|
|
||||||
|
|
||||||
|
Templates are processed in order -- the first matching template wins. New
|
||||||
|
templates can be appended to the end of the list with the
|
||||||
|
<<indices-put-mapping,PUT mapping>> API. If a new template has the same
|
||||||
|
name as an existing template, it will replace the old version.
|
||||||
|
|
||||||
|
[[match-mapping-type]]
|
||||||
|
==== `match_mapping_type`
|
||||||
|
|
||||||
|
The `match_mapping_type` matches on the datatype detected by
|
||||||
|
<<dynamic-field-mapping,dynamic field mapping>>, in other words, the datatype
|
||||||
|
that Elasticsearch thinks the field should have. Only the following datatypes
|
||||||
|
can be automatically detected: `boolean`, `date`, `double`, `long`, `object`,
|
||||||
|
`string`. It also accepts `*` to match all datatypes.
|
||||||
|
|
||||||
|
For example, if we wanted to map all integer fields as `integer` instead of
|
||||||
|
`long`, and all `string` fields as both `analyzed` and `not_analyzed`, we
|
||||||
|
could use the following template:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"dynamic_templates": [
|
||||||
|
{
|
||||||
|
"integers": {
|
||||||
|
"match_mapping_type": "long",
|
||||||
|
"mapping": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"strings": {
|
||||||
|
"match_mapping_type": "string",
|
||||||
|
"mapping": {
|
||||||
|
"type": "string",
|
||||||
|
"fields": {
|
||||||
|
"raw": {
|
||||||
|
"type": "string",
|
||||||
|
"index": "not_analyzed",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"my_integer": 5, <1>
|
||||||
|
"my_string": "Some string" <2>
|
||||||
|
}
|
||||||
|
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `my_integer` field is mapped as an `integer`.
|
||||||
|
<2> The `my_string` field is mapped as an analyzed `string`, with a `not_analyzed` <<multi-fields,multi field>>.
|
||||||
|
|
||||||
|
|
||||||
|
[[match-unmatch]]
|
||||||
|
==== `match` and `unmatch`
|
||||||
|
|
||||||
|
The `match` parameter uses a pattern to match on the fieldname, while
|
||||||
|
`unmatch` uses a pattern to exclude fields matched by `match`.
|
||||||
|
|
||||||
|
The following example matches all `string` fields whose name starts with
|
||||||
|
`long_` (except for those which end with `_text`) and maps them as `long`
|
||||||
|
fields:
|
||||||
|
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"dynamic_templates": [
|
||||||
|
{
|
||||||
|
"longs_as_strings": {
|
||||||
|
"match_mapping_type": "string",
|
||||||
|
"match": "long_*",
|
||||||
|
"unmatch": "*_text",
|
||||||
|
"mapping": {
|
||||||
|
"type": "long"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"long_num": "5", <1>
|
||||||
|
"long_text": "foo" <2>
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `long_num` field is mapped as a `long`.
|
||||||
|
<2> The `long_text` field uses the default `string` mapping.
|
||||||
|
|
||||||
|
[[match-pattern]]
|
||||||
|
==== `match_pattern`
|
||||||
|
|
||||||
|
The `match_pattern` parameter behaves just like the `match` parameter, but
|
||||||
|
supports full Java regular expression matching on the field name instead of
|
||||||
|
simple wildcards, for instance:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
"match_pattern": "^profit_\d+$"
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
[[path-match-unmatch]]
|
||||||
|
==== `path_match` and `path_unmatch`
|
||||||
|
|
||||||
|
The `path_match` and `path_unmatch` parameters work in the same way as `match`
|
||||||
|
and `unmatch`, but operate on the full dotted path to the field, not just the
|
||||||
|
final name, e.g. `some_object.*.some_field`.
|
||||||
|
|
||||||
|
This example copies the values of any fields in the `name` object to the
|
||||||
|
top-level `full_name` field, except for the `middle` field:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"dynamic_templates": [
|
||||||
|
{
|
||||||
|
"full_name": {
|
||||||
|
"path_match": "name.*",
|
||||||
|
"path_unmatch": "*.middle",
|
||||||
|
"mapping": {
|
||||||
|
"type": "string",
|
||||||
|
"copy_to": "full_name"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"name": {
|
||||||
|
"first": "Alice",
|
||||||
|
"middle": "Mary",
|
||||||
|
"last": "White"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
[[template-variables]]
|
||||||
|
==== `{name}` and `{dynamic_type}`
|
||||||
|
|
||||||
|
The `{name}` and `{dynamic_type}` placeholders are replaced in the `mapping`
|
||||||
|
with the field name and detected dynamic type. The following example sets all
|
||||||
|
string fields to use an <<analyzer,`analyzer`>> with the same name as the
|
||||||
|
field, and disables <<doc-values,`doc_values`>> for all non-string fields:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"dynamic_templates": [
|
||||||
|
{
|
||||||
|
"named_analyzers": {
|
||||||
|
"match_mapping_type": "string",
|
||||||
|
"match": "*",
|
||||||
|
"mapping": {
|
||||||
|
"type": "string",
|
||||||
|
"analyzer": "{name}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"no_doc_values": {
|
||||||
|
"match_mapping_type":"*",
|
||||||
|
"mapping": {
|
||||||
|
"type": "{dynamic_type}",
|
||||||
|
"doc_values": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"english": "Some English text", <1>
|
||||||
|
"count": 5 <2>
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `english` field is mapped as a `string` field with the `english` analyzer.
|
||||||
|
<2> The `count` field is mapped as a `long` field with `doc_values` disabled
|
||||||
|
|
|
@ -1,257 +0,0 @@
|
||||||
[[fielddata-formats]]
|
|
||||||
== Fielddata formats
|
|
||||||
|
|
||||||
The field data format controls how field data should be stored.
|
|
||||||
|
|
||||||
Depending on the field type, there might be several field data types
|
|
||||||
available. In particular, string, geo-point and numeric types support the `doc_values`
|
|
||||||
format which allows for computing the field data data-structures at indexing
|
|
||||||
time and storing them on disk. Although it will make the index larger and may
|
|
||||||
be slightly slower, this implementation will be more near-realtime-friendly
|
|
||||||
and will require much less memory from the JVM than other implementations.
|
|
||||||
|
|
||||||
Here is an example of how to configure the `tag` field to use the `paged_bytes` field
|
|
||||||
data format.
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"tag": {
|
|
||||||
"type": "string",
|
|
||||||
"fielddata": {
|
|
||||||
"format": "paged_bytes"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
It is possible to change the field data format (and the field data settings
|
|
||||||
in general) on a live index by using the update mapping API.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
=== String field data types
|
|
||||||
|
|
||||||
`paged_bytes` (default on analyzed string fields)::
|
|
||||||
Stores unique terms sequentially in a large buffer and maps documents to
|
|
||||||
the indices of the terms they contain in this large buffer.
|
|
||||||
|
|
||||||
`doc_values` (default when index is set to `not_analyzed`)::
|
|
||||||
Computes and stores field data data-structures on disk at indexing time.
|
|
||||||
Lowers memory usage but only works on non-analyzed strings (`index`: `no` or
|
|
||||||
`not_analyzed`).
|
|
||||||
|
|
||||||
[float]
|
|
||||||
=== Numeric field data types
|
|
||||||
|
|
||||||
`array`::
|
|
||||||
Stores field values in memory using arrays.
|
|
||||||
|
|
||||||
`doc_values` (default unless doc values are disabled)::
|
|
||||||
Computes and stores field data data-structures on disk at indexing time.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
=== Geo point field data types
|
|
||||||
|
|
||||||
`array`::
|
|
||||||
Stores latitudes and longitudes in arrays.
|
|
||||||
|
|
||||||
`doc_values` (default unless doc values are disabled)::
|
|
||||||
Computes and stores field data data-structures on disk at indexing time.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[global-ordinals]]
|
|
||||||
=== Global ordinals
|
|
||||||
|
|
||||||
Global ordinals is a data-structure on top of field data, that maintains an
|
|
||||||
incremental numbering for all the terms in field data in a lexicographic order.
|
|
||||||
Each term has a unique number and the number of term 'A' is lower than the number
|
|
||||||
of term 'B'. Global ordinals are only supported on string fields.
|
|
||||||
|
|
||||||
Field data on string also has ordinals, which is a unique numbering for all terms
|
|
||||||
in a particular segment and field. Global ordinals just build on top of this,
|
|
||||||
by providing a mapping between the segment ordinals and the global ordinals.
|
|
||||||
The latter being unique across the entire shard.
|
|
||||||
|
|
||||||
Global ordinals can be beneficial in search features that use segment ordinals already
|
|
||||||
such as the terms aggregator to improve the execution time. Often these search features
|
|
||||||
need to merge the segment ordinal results to a cross segment terms result. With
|
|
||||||
global ordinals this mapping happens during field data load time instead of during each
|
|
||||||
query execution. With global ordinals search features only need to resolve the actual
|
|
||||||
term when building the (shard) response, but during the execution there is no need
|
|
||||||
at all to use the actual terms and the unique numbering global ordinals provided is
|
|
||||||
sufficient and improves the execution time.
|
|
||||||
|
|
||||||
Global ordinals for a specified field are tied to all the segments of a shard (Lucene index),
|
|
||||||
which is different than for field data for a specific field which is tied to a single segment.
|
|
||||||
For this reason global ordinals need to be rebuilt in its entirety once new segments
|
|
||||||
become visible. This one time cost would happen anyway without global ordinals, but
|
|
||||||
then it would happen for each search execution instead!
|
|
||||||
|
|
||||||
The loading time of global ordinals depends on the number of terms in a field, but in general
|
|
||||||
it is low, since it source field data has already been loaded. The memory overhead of global
|
|
||||||
ordinals is a small because it is very efficiently compressed. Eager loading of global ordinals
|
|
||||||
can move the loading time from the first search request, to the refresh itself.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[fielddata-loading]]
|
|
||||||
=== Fielddata loading
|
|
||||||
|
|
||||||
By default, field data is loaded lazily, ie. the first time that a query that
|
|
||||||
requires them is executed. However, this can make the first requests that
|
|
||||||
follow a merge operation quite slow since fielddata loading is a heavy
|
|
||||||
operation.
|
|
||||||
|
|
||||||
It is possible to force field data to be loaded and cached eagerly through the
|
|
||||||
`loading` setting of fielddata:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"category": {
|
|
||||||
"type": "string",
|
|
||||||
"fielddata": {
|
|
||||||
"loading": "eager"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
Global ordinals can also be eagerly loaded:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"category": {
|
|
||||||
"type": "string",
|
|
||||||
"fielddata": {
|
|
||||||
"loading": "eager_global_ordinals"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
With the above setting both field data and global ordinals for a specific field
|
|
||||||
are eagerly loaded.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Disabling field data loading
|
|
||||||
|
|
||||||
Field data can take a lot of RAM so it makes sense to disable field data
|
|
||||||
loading on the fields that don't need field data, for example those that are
|
|
||||||
used for full-text search only. In order to disable field data loading, just
|
|
||||||
change the field data format to `disabled`. When disabled, all requests that
|
|
||||||
will try to load field data, e.g. when they include aggregations and/or sorting,
|
|
||||||
will return an error.
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"text": {
|
|
||||||
"type": "string",
|
|
||||||
"fielddata": {
|
|
||||||
"format": "disabled"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
The `disabled` format is supported by all field types.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[field-data-filtering]]
|
|
||||||
=== Filtering fielddata
|
|
||||||
|
|
||||||
It is possible to control which field values are loaded into memory,
|
|
||||||
which is particularly useful for string fields. When specifying the
|
|
||||||
<<mapping-core-types,mapping>> for a field, you
|
|
||||||
can also specify a fielddata filter.
|
|
||||||
|
|
||||||
Fielddata filters can be changed using the
|
|
||||||
<<indices-put-mapping,PUT mapping>>
|
|
||||||
API. After changing the filters, use the
|
|
||||||
<<indices-clearcache,Clear Cache>> API
|
|
||||||
to reload the fielddata using the new filters.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Filtering by frequency:
|
|
||||||
|
|
||||||
The frequency filter allows you to only load terms whose frequency falls
|
|
||||||
between a `min` and `max` value, which can be expressed an absolute
|
|
||||||
number (when the number is bigger than 1.0) or as a percentage
|
|
||||||
(eg `0.01` is `1%` and `1.0` is `100%`). Frequency is calculated
|
|
||||||
*per segment*. Percentages are based on the number of docs which have a
|
|
||||||
value for the field, as opposed to all docs in the segment.
|
|
||||||
|
|
||||||
Small segments can be excluded completely by specifying the minimum
|
|
||||||
number of docs that the segment should contain with `min_segment_size`:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"tag": {
|
|
||||||
"type": "string",
|
|
||||||
"fielddata": {
|
|
||||||
"filter": {
|
|
||||||
"frequency": {
|
|
||||||
"min": 0.001,
|
|
||||||
"max": 0.1,
|
|
||||||
"min_segment_size": 500
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Filtering by regex
|
|
||||||
|
|
||||||
Terms can also be filtered by regular expression - only values which
|
|
||||||
match the regular expression are loaded. Note: the regular expression is
|
|
||||||
applied to each term in the field, not to the whole field value. For
|
|
||||||
instance, to only load hashtags from a tweet, we can use a regular
|
|
||||||
expression which matches terms beginning with `#`:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"tweet": {
|
|
||||||
"type": "string",
|
|
||||||
"analyzer": "whitespace"
|
|
||||||
"fielddata": {
|
|
||||||
"filter": {
|
|
||||||
"regex": {
|
|
||||||
"pattern": "^#.*"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Combining filters
|
|
||||||
|
|
||||||
The `frequency` and `regex` filters can be combined:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"tweet": {
|
|
||||||
"type": "string",
|
|
||||||
"analyzer": "whitespace"
|
|
||||||
"fielddata": {
|
|
||||||
"filter": {
|
|
||||||
"regex": {
|
|
||||||
"pattern": "^#.*",
|
|
||||||
},
|
|
||||||
"frequency": {
|
|
||||||
"min": 0.001,
|
|
||||||
"max": 0.1,
|
|
||||||
"min_segment_size": 500
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
|
@ -5,7 +5,8 @@ Each document has metadata associated with it, such as the `_index`, mapping
|
||||||
<<mapping-type-field,`_type`>>, and `_id` meta-fields. The behaviour of some of these meta-fields
|
<<mapping-type-field,`_type`>>, and `_id` meta-fields. The behaviour of some of these meta-fields
|
||||||
can be customised when a mapping type is created.
|
can be customised when a mapping type is created.
|
||||||
|
|
||||||
The meta-fields are:
|
[float]
|
||||||
|
=== Identity meta-fields
|
||||||
|
|
||||||
[horizontal]
|
[horizontal]
|
||||||
<<mapping-index-field,`_index`>>::
|
<<mapping-index-field,`_index`>>::
|
||||||
|
@ -18,16 +19,26 @@ The meta-fields are:
|
||||||
|
|
||||||
<<mapping-type-field,`_type`>>::
|
<<mapping-type-field,`_type`>>::
|
||||||
|
|
||||||
The document's <<all-mapping-types,mapping type>>.
|
The document's <<mapping-type,mapping type>>.
|
||||||
|
|
||||||
<<mapping-id-field,`_id`>>::
|
<<mapping-id-field,`_id`>>::
|
||||||
|
|
||||||
The document's ID.
|
The document's ID.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
=== Document source meta-fields
|
||||||
|
|
||||||
<<mapping-source-field,`_source`>>::
|
<<mapping-source-field,`_source`>>::
|
||||||
|
|
||||||
The original JSON representing the body of the document.
|
The original JSON representing the body of the document.
|
||||||
|
|
||||||
|
<<mapping-size-field,`_size`>>::
|
||||||
|
|
||||||
|
The size of the `_source` field in bytes.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
=== Indexing meta-fields
|
||||||
|
|
||||||
<<mapping-all-field,`_all`>>::
|
<<mapping-all-field,`_all`>>::
|
||||||
|
|
||||||
A _catch-all_ field that indexes the values of all other fields.
|
A _catch-all_ field that indexes the values of all other fields.
|
||||||
|
@ -36,18 +47,6 @@ The meta-fields are:
|
||||||
|
|
||||||
All fields in the document which contain non-null values.
|
All fields in the document which contain non-null values.
|
||||||
|
|
||||||
<<mapping-parent-field,`_parent`>>::
|
|
||||||
|
|
||||||
Used to create a parent-child relationship between two mapping types.
|
|
||||||
|
|
||||||
<<mapping-routing-field,`_routing`>>::
|
|
||||||
|
|
||||||
A custom routing value which routes a document to a particular shard.
|
|
||||||
|
|
||||||
<<mapping-size-field,`_size`>>::
|
|
||||||
|
|
||||||
The size of the `_source` field in bytes.
|
|
||||||
|
|
||||||
<<mapping-timestamp-field,`_timestamp`>>::
|
<<mapping-timestamp-field,`_timestamp`>>::
|
||||||
|
|
||||||
A timestamp associated with the document, either specified manually or auto-generated.
|
A timestamp associated with the document, either specified manually or auto-generated.
|
||||||
|
@ -56,27 +55,49 @@ The meta-fields are:
|
||||||
|
|
||||||
How long a document should live before it is automatically deleted.
|
How long a document should live before it is automatically deleted.
|
||||||
|
|
||||||
include::fields/index-field.asciidoc[]
|
[float]
|
||||||
|
=== Routing meta-fields
|
||||||
|
|
||||||
include::fields/uid-field.asciidoc[]
|
<<mapping-parent-field,`_parent`>>::
|
||||||
|
|
||||||
include::fields/type-field.asciidoc[]
|
Used to create a parent-child relationship between two mapping types.
|
||||||
|
|
||||||
|
<<mapping-routing-field,`_routing`>>::
|
||||||
|
|
||||||
|
A custom routing value which routes a document to a particular shard.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
=== Other meta-field
|
||||||
|
|
||||||
|
<<mapping-meta-field,`_meta`>>::
|
||||||
|
|
||||||
|
Application specific metadata.
|
||||||
|
|
||||||
include::fields/id-field.asciidoc[]
|
|
||||||
|
|
||||||
include::fields/source-field.asciidoc[]
|
|
||||||
|
|
||||||
include::fields/all-field.asciidoc[]
|
include::fields/all-field.asciidoc[]
|
||||||
|
|
||||||
include::fields/field-names-field.asciidoc[]
|
include::fields/field-names-field.asciidoc[]
|
||||||
|
|
||||||
|
include::fields/id-field.asciidoc[]
|
||||||
|
|
||||||
|
include::fields/index-field.asciidoc[]
|
||||||
|
|
||||||
|
include::fields/meta-field.asciidoc[]
|
||||||
|
|
||||||
include::fields/parent-field.asciidoc[]
|
include::fields/parent-field.asciidoc[]
|
||||||
|
|
||||||
include::fields/routing-field.asciidoc[]
|
include::fields/routing-field.asciidoc[]
|
||||||
|
|
||||||
include::fields/size-field.asciidoc[]
|
include::fields/size-field.asciidoc[]
|
||||||
|
|
||||||
|
include::fields/source-field.asciidoc[]
|
||||||
|
|
||||||
include::fields/timestamp-field.asciidoc[]
|
include::fields/timestamp-field.asciidoc[]
|
||||||
|
|
||||||
include::fields/ttl-field.asciidoc[]
|
include::fields/ttl-field.asciidoc[]
|
||||||
|
|
||||||
|
include::fields/type-field.asciidoc[]
|
||||||
|
|
||||||
|
include::fields/uid-field.asciidoc[]
|
||||||
|
|
||||||
|
|
|
@ -151,82 +151,18 @@ PUT my_index
|
||||||
<1> The `_all` field is disabled for the `my_type` type.
|
<1> The `_all` field is disabled for the `my_type` type.
|
||||||
<2> The `query_string` query will default to querying the `content` field in this index.
|
<2> The `query_string` query will default to querying the `content` field in this index.
|
||||||
|
|
||||||
[[include-in-all]]
|
[[excluding-from-all]]
|
||||||
==== Including specific fields in `_all`
|
==== Excluding fields from `_all`
|
||||||
|
|
||||||
Individual fields can be included or excluded from the `_all` field with the
|
Individual fields can be included or excluded from the `_all` field with the
|
||||||
`include_in_all` setting, which defaults to `true`:
|
<<include-in-all,`include_in_all`>> setting.
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------
|
|
||||||
PUT my_index
|
|
||||||
{
|
|
||||||
"mappings": {
|
|
||||||
"my_type": {
|
|
||||||
"properties": {
|
|
||||||
"title": { <1>
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
"content": { <1>
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"date": { <2>
|
|
||||||
"type": "date",
|
|
||||||
"include_in_all": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------
|
|
||||||
// AUTOSENSE
|
|
||||||
|
|
||||||
<1> The `title` and `content` fields with be included in the `_all` field.
|
|
||||||
<2> The `date` field will not be included in the `_all` field.
|
|
||||||
|
|
||||||
The `include_in_all` parameter can also be set at the type level and on
|
|
||||||
<<mapping-object-type,`object`>> or <<mapping-nested-type,`nested`>> fields,
|
|
||||||
in which case all sub-fields inherit that setting. For instance:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------
|
|
||||||
PUT my_index
|
|
||||||
{
|
|
||||||
"mappings": {
|
|
||||||
"my_type": {
|
|
||||||
"include_in_all": false, <1>
|
|
||||||
"properties": {
|
|
||||||
"title": { "type": "string" },
|
|
||||||
"author": {
|
|
||||||
"include_in_all": true, <2>
|
|
||||||
"properties": {
|
|
||||||
"first_name": { "type": "string" },
|
|
||||||
"last_name": { "type": "string" }
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"editor": {
|
|
||||||
"properties": {
|
|
||||||
"first_name": { "type": "string" }, <3>
|
|
||||||
"last_name": { "type": "string", "include_in_all": true } <3>
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------
|
|
||||||
// AUTOSENSE
|
|
||||||
|
|
||||||
<1> All fields in `my_type` are excluded from `_all`.
|
|
||||||
<2> The `author.first_name` and `author.last_name` fields are included in `_all`.
|
|
||||||
<3> Only the `editor.last_name` field is included in `_all`.
|
|
||||||
The `editor.first_name` inherits the type-level setting and is excluded.
|
|
||||||
|
|
||||||
[[all-field-and-boosting]]
|
[[all-field-and-boosting]]
|
||||||
==== Index boosting and the `_all` field
|
==== Index boosting and the `_all` field
|
||||||
|
|
||||||
Individual fields can be _boosted_ at index time, with the `boost` parameter.
|
Individual fields can be _boosted_ at index time, with the <<index-boost,`boost`>>
|
||||||
The `_all` field takes these boosts into account:
|
parameter. The `_all` field takes these boosts into account:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------
|
--------------------------------
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
=== `_id` field
|
=== `_id` field
|
||||||
|
|
||||||
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
|
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
|
||||||
<<all-mapping-types,Mapping Types>>) and an <<mapping-id-field,`_id`>>. The
|
<<mapping-type>>) and an <<mapping-id-field,`_id`>>. The `_id` field is not
|
||||||
`_id` field is not indexed as its value can be derived automatically from the
|
indexed as its value can be derived automatically from the
|
||||||
<<mapping-uid-field,`_uid`>> field.
|
<<mapping-uid-field,`_uid`>> field.
|
||||||
|
|
||||||
The value of the `_id` field is accessible in queries and scripts, but _not_
|
The value of the `_id` field is accessible in queries and scripts, but _not_
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
[[mapping-meta-field]]
|
||||||
|
=== `_meta` field
|
||||||
|
|
||||||
|
Each mapping type can have custom meta data associated with it. These are not
|
||||||
|
used at all by Elasticsearch, but can be used to store application-specific
|
||||||
|
metadata, such as the class that a document belongs to:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"user": {
|
||||||
|
"_meta": { <1>
|
||||||
|
"class": "MyApp::User",
|
||||||
|
"version": {
|
||||||
|
"min": "1.0",
|
||||||
|
"max": "1.3"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> This `_meta` info can be retrieved with the
|
||||||
|
<<indices-get-mapping,GET mapping>> API.
|
||||||
|
|
||||||
|
The `_meta` field can be updated on an existing type using the
|
||||||
|
<<indices-put-mapping,PUT mapping>> API.
|
|
@ -78,8 +78,7 @@ stored.
|
||||||
WARNING: Removing fields from the `_source` has similar downsides to disabling
|
WARNING: Removing fields from the `_source` has similar downsides to disabling
|
||||||
`_source`, especially the fact that you cannot reindex documents from one
|
`_source`, especially the fact that you cannot reindex documents from one
|
||||||
Elasticsearch index to another. Consider using
|
Elasticsearch index to another. Consider using
|
||||||
<<search-request-source-filtering,source filtering>> or a
|
<<search-request-source-filtering,source filtering>> instead.
|
||||||
<<mapping-transform,transform script>> instead.
|
|
||||||
|
|
||||||
The `includes`/`excludes` parameters (which also accept wildcards) can be used
|
The `includes`/`excludes` parameters (which also accept wildcards) can be used
|
||||||
as follows:
|
as follows:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
[[mapping-ttl-field]]
|
[[mapping-ttl-field]]
|
||||||
=== `_ttl`
|
=== `_ttl` field
|
||||||
|
|
||||||
Some types of documents, such as session data or special offers, come with an
|
Some types of documents, such as session data or special offers, come with an
|
||||||
expiration date. The `_ttl` field allows you to specify the minimum time a
|
expiration date. The `_ttl` field allows you to specify the minimum time a
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
=== `_type` field
|
=== `_type` field
|
||||||
|
|
||||||
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
|
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
|
||||||
<<all-mapping-types,Mapping Types>>) and an <<mapping-id-field,`_id`>>. The
|
<<mapping-type>>) and an <<mapping-id-field,`_id`>>. The `_type` field is
|
||||||
`_type` field is indexed in order to make searching by type name fast.
|
indexed in order to make searching by type name fast.
|
||||||
|
|
||||||
The value of the `_type` field is accessible in queries, aggregations,
|
The value of the `_type` field is accessible in queries, aggregations,
|
||||||
scripts, and when sorting:
|
scripts, and when sorting:
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
=== `_uid` field
|
=== `_uid` field
|
||||||
|
|
||||||
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
|
Each document indexed is associated with a <<mapping-type-field,`_type`>> (see
|
||||||
<<all-mapping-types,Mapping Types>>) and an <<mapping-id-field,`_id`>>. These
|
<<mapping-type>>) and an <<mapping-id-field,`_id`>>. These values are
|
||||||
values are combined as `{type}#{id}` and indexed as the `_uid` field.
|
combined as `{type}#{id}` and indexed as the `_uid` field.
|
||||||
|
|
||||||
The value of the `_uid` field is accessible in queries, aggregations, scripts,
|
The value of the `_uid` field is accessible in queries, aggregations, scripts,
|
||||||
and when sorting:
|
and when sorting:
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
[[mapping-meta]]
|
|
||||||
== Meta
|
|
||||||
|
|
||||||
Each mapping can have custom meta data associated with it. These are
|
|
||||||
simple storage elements that are simply persisted along with the mapping
|
|
||||||
and can be retrieved when fetching the mapping definition. The meta is
|
|
||||||
defined under the `_meta` element, for example:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"tweet" : {
|
|
||||||
"_meta" : {
|
|
||||||
"attr1" : "value1",
|
|
||||||
"attr2" : {
|
|
||||||
"attr3" : "value3"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
Meta can be handy for example for client libraries that perform
|
|
||||||
serialization and deserialization to store its meta model (for example,
|
|
||||||
the class the document maps to).
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
[[mapping-params]]
|
||||||
|
== Mapping parameters
|
||||||
|
|
||||||
|
The following pages provide detailed explanations of the various mapping
|
||||||
|
parameters that are used by <<mapping-types,field mappings>>:
|
||||||
|
|
||||||
|
|
||||||
|
The following mapping parameters are common to some or all field datatypes:
|
||||||
|
|
||||||
|
* <<analyzer,`analyzer`>>
|
||||||
|
* <<index-boost,`boost`>>
|
||||||
|
* <<coerce,`coerce`>>
|
||||||
|
* <<copy-to,`copy_to`>>
|
||||||
|
* <<doc-values,`doc_values`>>
|
||||||
|
* <<dynamic,`dynamic`>>
|
||||||
|
* <<enabled,`enabled`>>
|
||||||
|
* <<fielddata,`fielddata`>>
|
||||||
|
* <<geohash,`geohash`>>
|
||||||
|
* <<geohash-precision,`geohash_precision`>>
|
||||||
|
* <<geohash-prefix,`geohash_prefix`>>
|
||||||
|
* <<mapping-date-format,`format`>>
|
||||||
|
* <<ignore-above,`ignore_above`>>
|
||||||
|
* <<ignore-malformed,`ignore_malformed`>>
|
||||||
|
* <<include-in-all,`include_in_all`>>
|
||||||
|
* <<index-options,`index_options`>>
|
||||||
|
* <<lat-lon,`lat_lon`>>
|
||||||
|
* <<mapping-index,`index`>>
|
||||||
|
* <<multi-fields,`fields`>>
|
||||||
|
* <<norms,`norms`>>
|
||||||
|
* <<null-value,`null_value`>>
|
||||||
|
* <<position-offset-gap,`position_offset_gap`>>
|
||||||
|
* <<properties,`properties`>>
|
||||||
|
* <<search-analyzer,`search_analyzer`>>
|
||||||
|
* <<similarity,`similarity`>>
|
||||||
|
* <<mapping-store,`store`>>
|
||||||
|
* <<term-vector,`term_vector`>>
|
||||||
|
|
||||||
|
|
||||||
|
include::params/analyzer.asciidoc[]
|
||||||
|
|
||||||
|
include::params/boost.asciidoc[]
|
||||||
|
|
||||||
|
include::params/coerce.asciidoc[]
|
||||||
|
|
||||||
|
include::params/copy-to.asciidoc[]
|
||||||
|
|
||||||
|
include::params/doc-values.asciidoc[]
|
||||||
|
|
||||||
|
include::params/dynamic.asciidoc[]
|
||||||
|
|
||||||
|
include::params/enabled.asciidoc[]
|
||||||
|
|
||||||
|
include::params/fielddata.asciidoc[]
|
||||||
|
|
||||||
|
include::params/format.asciidoc[]
|
||||||
|
|
||||||
|
include::params/geohash.asciidoc[]
|
||||||
|
|
||||||
|
include::params/geohash-precision.asciidoc[]
|
||||||
|
|
||||||
|
include::params/geohash-prefix.asciidoc[]
|
||||||
|
|
||||||
|
include::params/ignore-above.asciidoc[]
|
||||||
|
|
||||||
|
include::params/ignore-malformed.asciidoc[]
|
||||||
|
|
||||||
|
include::params/include-in-all.asciidoc[]
|
||||||
|
|
||||||
|
include::params/index.asciidoc[]
|
||||||
|
|
||||||
|
include::params/index-options.asciidoc[]
|
||||||
|
|
||||||
|
include::params/lat-lon.asciidoc[]
|
||||||
|
|
||||||
|
include::params/multi-fields.asciidoc[]
|
||||||
|
|
||||||
|
include::params/norms.asciidoc[]
|
||||||
|
|
||||||
|
include::params/null-value.asciidoc[]
|
||||||
|
|
||||||
|
include::params/position-offset-gap.asciidoc[]
|
||||||
|
|
||||||
|
include::params/precision-step.asciidoc[]
|
||||||
|
|
||||||
|
include::params/properties.asciidoc[]
|
||||||
|
|
||||||
|
include::params/search-analyzer.asciidoc[]
|
||||||
|
|
||||||
|
include::params/similarity.asciidoc[]
|
||||||
|
|
||||||
|
include::params/store.asciidoc[]
|
||||||
|
|
||||||
|
include::params/term-vector.asciidoc[]
|
||||||
|
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
|
@ -0,0 +1,80 @@
|
||||||
|
[[analyzer]]
|
||||||
|
=== `analyzer`
|
||||||
|
|
||||||
|
The values of <<mapping-index,`analyzed`>> string fields are passed through an
|
||||||
|
<<analysis,analyzer>> to convert the string into a stream of _tokens_ or
|
||||||
|
_terms_. For instance, the string `"The quick Brown Foxes."` may, depending
|
||||||
|
on which analyzer is used, be analyzed to the tokens: `quick`, `brown`,
|
||||||
|
`fox`. These are the actual terms that are indexed for the field, which makes
|
||||||
|
it possible to search efficiently for individual words _within_ big blobs of
|
||||||
|
text.
|
||||||
|
|
||||||
|
This analysis process needs to happen not just at index time, but also at
|
||||||
|
query time: the query string needs to be passed through the same (or a
|
||||||
|
similar) analyzer so that the terms that it tries to find are in the same
|
||||||
|
format as those that exist in the index.
|
||||||
|
|
||||||
|
Elasticsearch ships with a number of <<analysis-analyzers,pre-defined analyzers>>,
|
||||||
|
which can be used without further configuration. It also ships with many
|
||||||
|
<<analysis-charfilters,character filters>>, <<analysis-tokenizers,tokenizers>>,
|
||||||
|
and <<analysis-tokenfilters>> which can be combined to configure
|
||||||
|
custom analyzers per index.
|
||||||
|
|
||||||
|
Analyzers can be specified per-query, per-field or per-index. At index time,
|
||||||
|
Elasticsearch will look for an analyzer in this order:
|
||||||
|
|
||||||
|
* The `analyzer` defined in the field mapping.
|
||||||
|
* An analyzer named `default` in the index settings.
|
||||||
|
* The <<analysis-standard-analyzer,`standard`>> analyzer.
|
||||||
|
|
||||||
|
At query time, there are a few more layers:
|
||||||
|
|
||||||
|
* The `analyzer` defined in a <<full-text-queries,full-text query>>.
|
||||||
|
* The `search_analyzer` defined in the field mapping.
|
||||||
|
* The `analyzer` defined in the field mapping.
|
||||||
|
* An analyzer named `default_search` in the index settings.
|
||||||
|
* An analyzer named `default` in the index settings.
|
||||||
|
* The <<analysis-standard-analyzer,`standard`>> analyzer.
|
||||||
|
|
||||||
|
The easiest way to specify an analyzer for a particular field is to define it
|
||||||
|
in the field mapping, as follows:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"text": { <1>
|
||||||
|
"type": "string",
|
||||||
|
"fields": {
|
||||||
|
"english": { <2>
|
||||||
|
"type": "string",
|
||||||
|
"analyzer": "english"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_analyze?field=text <3>
|
||||||
|
{
|
||||||
|
"text": "The quick Brown Foxes."
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_analyze?field=text.english <4>
|
||||||
|
{
|
||||||
|
"text": "The quick Brown Foxes."
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `text` field uses the default `standard` analyzer`.
|
||||||
|
<2> The `text.english` <<multi-fields,multi-field>> uses the `english` analyzer, which removes stop words and applies stemming.
|
||||||
|
<3> This returns the tokens: [ `the`, `quick`, `brown`, `foxes` ].
|
||||||
|
<4> This returns the tokens: [ `quick`, `brown`, `fox` ].
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
[[index-boost]]
|
||||||
|
=== `boost`
|
||||||
|
|
||||||
|
Individual fields can be _boosted_ -- count more towards the relevance score
|
||||||
|
-- at index time, with the `boost` parameter as follows:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"title": {
|
||||||
|
"type": "string",
|
||||||
|
"boost": 2 <1>
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
<1> Matches on the `title` field will have twice the weight as those on the
|
||||||
|
`content` field, which has the default `boost` of `1.0`.
|
||||||
|
|
||||||
|
Note that a `title` field will usually be shorter than a `content` field. The
|
||||||
|
default relevance calculation takes field length into account, so a short
|
||||||
|
`title` field will have a higher natural boost than a long `content` field.
|
||||||
|
|
||||||
|
[WARNING]
|
||||||
|
.Why index time boosting is a bad idea
|
||||||
|
==================================================
|
||||||
|
|
||||||
|
We advise against using index time boosting for the following reasons:
|
||||||
|
|
||||||
|
* You cannot change index-time `boost` values without reindexing all of your
|
||||||
|
documents.
|
||||||
|
|
||||||
|
* Every query supports query-time boosting which achieves the same effect. The
|
||||||
|
difference is that you can tweak the `boost` value without having to reindex.
|
||||||
|
|
||||||
|
* Index-time boosts are stored as part of the <<norms,`norm`>>, which is only one
|
||||||
|
byte. This reduces the resolution of the field length normalization factor
|
||||||
|
which can lead to lower quality relevance calculations.
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
|
||||||
|
The only advantage that index time boosting has is that it is copied with the
|
||||||
|
value into the <<mapping-all-field,`_all`>> field. This means that, when
|
||||||
|
querying the `_all` field, words that originated from the `title` field will
|
||||||
|
have a higher score than words that originated in the `content` field.
|
||||||
|
This functionality comes at a cost: queries on the `_all` field are slower
|
||||||
|
when index-time boosting is used.
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
[[coerce]]
|
||||||
|
=== `coerce`
|
||||||
|
|
||||||
|
Data is not always clean. Depending on how it is produced a number might be
|
||||||
|
rendered in the JSON body as a true JSON number, e.g. `5`, but it might also
|
||||||
|
be rendered as a string, e.g. `"5"`. Alternatively, a number that should be
|
||||||
|
an integer might instead be rendered as a floating point, e.g. `5.0`, or even
|
||||||
|
`"5.0"`.
|
||||||
|
|
||||||
|
Coercion attempts to clean up dirty values to fit the datatype of a field.
|
||||||
|
For instance:
|
||||||
|
|
||||||
|
* Strings will be coerced to numbers.
|
||||||
|
* Floating points will be truncated for integer values.
|
||||||
|
* Lon/lat geo-points will be normalized to a standard -180:180 / -90:90 coordinate system.
|
||||||
|
|
||||||
|
For instance:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"number_one": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"number_two": {
|
||||||
|
"type": "integer",
|
||||||
|
"coerce": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"number_one": "10" <1>
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/2
|
||||||
|
{
|
||||||
|
"number_two": "10" <2>
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `number_one` field will contain the integer `10`.
|
||||||
|
<2> This document will be rejected because coercion is disabled.
|
||||||
|
|
||||||
|
[[coerce-setting]]
|
||||||
|
==== Index-level default
|
||||||
|
|
||||||
|
The `index.mapping.coerce` setting can be set on the index level to disable
|
||||||
|
coercion globally across all mapping types:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"settings": {
|
||||||
|
"index.mapping.coerce": false
|
||||||
|
},
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"number_one": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"number_two": {
|
||||||
|
"type": "integer",
|
||||||
|
"coerce": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{ "number_one": "10" } <1>
|
||||||
|
|
||||||
|
PUT my_index/my_type/2
|
||||||
|
{ "number_two": "10" } <2>
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> This document will be rejected because the `number_one` field inherits the index-level coercion setting.
|
||||||
|
<2> The `number_two` field overrides the index level setting to enable coercion.
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
[[copy-to]]
|
||||||
|
=== `copy_to`
|
||||||
|
|
||||||
|
The `copy_to` parameter allows you to create custom
|
||||||
|
<<mapping-all-field,`_all`>> fields. In other words, the values of multiple
|
||||||
|
fields can be copied into a group field, which can then be queried as a single
|
||||||
|
field. For instance, the `first_name` and `last_name` fields can be copied to
|
||||||
|
the `full_name` field as follows:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT /my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"first_name": {
|
||||||
|
"type": "string",
|
||||||
|
"copy_to": "full_name" <1>
|
||||||
|
},
|
||||||
|
"last_name": {
|
||||||
|
"type": "string",
|
||||||
|
"copy_to": "full_name" <1>
|
||||||
|
},
|
||||||
|
"full_name": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT /my_index/my_type/1
|
||||||
|
{
|
||||||
|
"first_name": "John",
|
||||||
|
"last_name": "Smith"
|
||||||
|
}
|
||||||
|
|
||||||
|
GET /my_index/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"match": {
|
||||||
|
"full_name": { <2>
|
||||||
|
"query": "John Smith",
|
||||||
|
"operator": "and"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The values of the `first_name` and `last_name` fields are copied to the
|
||||||
|
`full_name` field.
|
||||||
|
|
||||||
|
<2> The `first_name` and `last_name` fields can still be queried for the
|
||||||
|
first name and last name respectively, but the `full_name` field can be
|
||||||
|
queried for both first and last names.
|
||||||
|
|
||||||
|
Some important points:
|
||||||
|
|
||||||
|
* It is the field _value_ which is copied, not the terms (which result from the analysis process).
|
||||||
|
* The original <<mapping-source-field,`_source`>> field will not be modified to show the copied values.
|
||||||
|
* The same value can be copied to multiple fields, with `"copy_to": [ "field_1", "field_2" ]`
|
|
@ -0,0 +1,46 @@
|
||||||
|
[[doc-values]]
|
||||||
|
=== `doc_values`
|
||||||
|
|
||||||
|
Most fields are <<mapping-index,indexed>> by default, which makes them
|
||||||
|
searchable. The inverted index allows queries to look up the search term in
|
||||||
|
unique sorted list of terms, and from that immediately have access to the list
|
||||||
|
of documents that contain the term.
|
||||||
|
|
||||||
|
Sorting, aggregations, and access to field values in scripts requires a
|
||||||
|
different data access pattern. Instead of lookup up the term and finding
|
||||||
|
documents, we need to be able to look up the document and find the terms that
|
||||||
|
is has in a field.
|
||||||
|
|
||||||
|
Doc values are the on-disk data structure, built at document index time, which
|
||||||
|
makes this data access pattern possible. Doc values are supported on almost
|
||||||
|
all field types, with the __notable exception of `analyzed` string fields__.
|
||||||
|
|
||||||
|
All fields which support doc values have them enabled by default. If you are
|
||||||
|
sure that you don't need to sort or aggregate on a field, or access the field
|
||||||
|
value from a script, you can disable doc values in order to save disk space:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"status_code": { <1>
|
||||||
|
"type": "string",
|
||||||
|
"index": "not_analyzed"
|
||||||
|
},
|
||||||
|
"session_id": { <2>
|
||||||
|
"type": "string",
|
||||||
|
"index": "not_analyzed",
|
||||||
|
"doc_values": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `status_code` field has `doc_values` enabled by default.
|
||||||
|
<2> The `session_id` has `doc_values` disabled, but can still be queried.
|
||||||
|
|
|
@ -0,0 +1,87 @@
|
||||||
|
[[dynamic]]
|
||||||
|
=== `dynamic`
|
||||||
|
|
||||||
|
By default, fields can be added _dynamically_ to a document, or to
|
||||||
|
<<object,inner objects>> within a document, just by indexing a document
|
||||||
|
containing the new field. For instance:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
DELETE my_index <1>
|
||||||
|
|
||||||
|
PUT my_index/my_type/1 <2>
|
||||||
|
{
|
||||||
|
"username": "johnsmith",
|
||||||
|
"name": {
|
||||||
|
"first": "John",
|
||||||
|
"last": "Smith"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_mapping <3>
|
||||||
|
|
||||||
|
PUT my_index/my_type/2 <4>
|
||||||
|
{
|
||||||
|
"username": "marywhite",
|
||||||
|
"email": "mary@white.com",
|
||||||
|
"name": {
|
||||||
|
"first": "Mary",
|
||||||
|
"middle": "Alice",
|
||||||
|
"last": "White"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_mapping <5>
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> First delete the index, in case it already exists.
|
||||||
|
<2> This document introduces the string field `username`, the object field
|
||||||
|
`name`, and two string fields under the `name` object which can be
|
||||||
|
referred to as `name.first` and `name.last`.
|
||||||
|
<3> Check the mapping to verify the above.
|
||||||
|
<4> This document adds two string fields: `email` and `name.middle`.
|
||||||
|
<5> Check the mapping to verify the changes.
|
||||||
|
|
||||||
|
The details of how new fields are detected and added to the mapping is explained in <<dynamic-mapping>>.
|
||||||
|
|
||||||
|
The `dynamic` setting controls whether new fields can be added dynamically or
|
||||||
|
not. It accepts three settings:
|
||||||
|
|
||||||
|
[horizontal]
|
||||||
|
`true`:: Newly detected fields are added to the mapping. (default)
|
||||||
|
`false`:: Newly detected fields are ignored. New fields must be added explicitly.
|
||||||
|
`strict`:: If new fields are detected, an exception is thrown and the document is rejected.
|
||||||
|
|
||||||
|
The `dynamic` setting may be set at the mapping type level, and on each
|
||||||
|
<<object,inner object>>. Inner objects inherit the setting from their parent
|
||||||
|
object or from the mapping type. For instance:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"dynamic": false, <1>
|
||||||
|
"properties": {
|
||||||
|
"user": { <2>
|
||||||
|
"properties": {
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"social_networks": { <3>
|
||||||
|
"dynamic": true,
|
||||||
|
"properties": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> Dynamic mapping is disabled at the type level, so no new top-level fields will be added dynamically.
|
||||||
|
<2> The `user` object inherits the type-level setting.
|
||||||
|
<3> The `user.social_networks` object enables dynamic mapping, so new fields may be added to this inner object.
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
[[enabled]]
|
||||||
|
=== `enabled`
|
||||||
|
|
||||||
|
Elasticsearch tries to index all of the fields you give it, but sometimes you
|
||||||
|
want to just store the field without indexing it. For instance, imagine that
|
||||||
|
you are using Elasticsearch as a web session store. You may want to index the
|
||||||
|
session ID and last update time, but you don't need to query or run
|
||||||
|
aggregations on the session data itself.
|
||||||
|
|
||||||
|
The `enabled` setting, which can be applied only to the mapping type and to
|
||||||
|
<<object,`object`>> fields, causes Elasticsearch to skip parsing of the
|
||||||
|
contents of the field entirely. The JSON can still be retrieved from the
|
||||||
|
<<mapping-source-field,`_source`>> field, but it is not searchable or stored
|
||||||
|
in any other way:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"session": {
|
||||||
|
"properties": {
|
||||||
|
"user_id": {
|
||||||
|
"type": "string",
|
||||||
|
"index": "not_analyzed"
|
||||||
|
},
|
||||||
|
"last_updated": {
|
||||||
|
"type": "date"
|
||||||
|
},
|
||||||
|
"session_data": { <1>
|
||||||
|
"enabled": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/session/session_1
|
||||||
|
{
|
||||||
|
"user_id": "kimchy",
|
||||||
|
"session_data": { <2>
|
||||||
|
"arbitrary_object": {
|
||||||
|
"some_array": [ "foo", "bar", { "baz": 2 } ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"last_updated": "2015-12-06T18:20:22"
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/session/session_2
|
||||||
|
{
|
||||||
|
"user_id": "jpountz",
|
||||||
|
"session_data": "none", <3>
|
||||||
|
"last_updated": "2015-12-06T18:22:13"
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `session_data` field is disabled.
|
||||||
|
<2> Any arbitrary data can be passed to the `session_data` field as it will be entirely ignored.
|
||||||
|
<3> The `session_data` will also ignore values that are not JSON objects.
|
||||||
|
|
||||||
|
The entire mapping type may be disabled as well, in which case the document is
|
||||||
|
stored in the <<mapping-source-field,`_source`>> field, which means it can be
|
||||||
|
retrieved, but none of its contents are indexed in any way:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"session": { <1>
|
||||||
|
"enabled": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/session/session_1
|
||||||
|
{
|
||||||
|
"user_id": "kimchy",
|
||||||
|
"session_data": {
|
||||||
|
"arbitrary_object": {
|
||||||
|
"some_array": [ "foo", "bar", { "baz": 2 } ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"last_updated": "2015-12-06T18:20:22"
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/session/session_1 <2>
|
||||||
|
|
||||||
|
GET my_index/_mapping <3>
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The entire `session` mapping type is disabled.
|
||||||
|
<2> The document can be retrieved.
|
||||||
|
<3> Checking the mapping reveals that no fields have been added.
|
|
@ -0,0 +1,225 @@
|
||||||
|
[[fielddata]]
|
||||||
|
=== `fielddata`
|
||||||
|
|
||||||
|
Most fields are <<mapping-index,indexed>> by default, which makes them
|
||||||
|
searchable. The inverted index allows queries to look up the search term in
|
||||||
|
unique sorted list of terms, and from that immediately have access to the list
|
||||||
|
of documents that contain the term.
|
||||||
|
|
||||||
|
Sorting, aggregations, and access to field values in scripts requires a
|
||||||
|
different data access pattern. Instead of lookup up the term and finding
|
||||||
|
documents, we need to be able to look up the document and find the terms that
|
||||||
|
is has in a field.
|
||||||
|
|
||||||
|
Most fields can use index-time, on-disk <<doc-values,`doc_values`>> to support
|
||||||
|
this type of data access pattern, but `analyzed` string fields do not support
|
||||||
|
`doc_values`.
|
||||||
|
|
||||||
|
Instead, `analyzed` strings use a query-time data structure called
|
||||||
|
`fielddata`. This data structure is built on demand the first time that a
|
||||||
|
field is used for aggregations, sorting, or is accessed in a script. It is built
|
||||||
|
by reading the entire inverted index for each segment from disk, inverting the
|
||||||
|
term ↔︎ document relationship, and storing the result in memory, in the
|
||||||
|
JVM heap.
|
||||||
|
|
||||||
|
|
||||||
|
Loading fielddata is an expensive process so, once it has been loaded, it
|
||||||
|
remains in memory for the lifetime of the segment.
|
||||||
|
|
||||||
|
[WARNING]
|
||||||
|
.Fielddata can fill up your heap space
|
||||||
|
==============================================================================
|
||||||
|
Fielddata can consume a lot of heap space, especially when loading high
|
||||||
|
cardinality `analyzed` string fields. Most of the time, it doesn't make sense
|
||||||
|
to sort or aggregate on `analyzed` string fields (with the notable exception
|
||||||
|
of the
|
||||||
|
<<search-aggregations-bucket-significantterms-aggregation,`significant_terms`>>
|
||||||
|
aggregation). Always think about whether a `not_analyzed` field (which can
|
||||||
|
use `doc_values`) would be a better fit for your use case.
|
||||||
|
==============================================================================
|
||||||
|
|
||||||
|
[[fielddata-format]]
|
||||||
|
==== `fielddata.format`
|
||||||
|
|
||||||
|
For `analyzed` string fields, the fielddata `format` controls whether
|
||||||
|
fielddata should be enabled or not. It accepts: `disabled` and `paged_bytes`
|
||||||
|
(enabled, which is the default). To disable fielddata loading, you can use
|
||||||
|
the following mapping:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"text": {
|
||||||
|
"type": "string",
|
||||||
|
"fielddata": {
|
||||||
|
"format": "disabled" <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `text` field cannot be used for sorting, aggregations, or in scripts.
|
||||||
|
|
||||||
|
.Fielddata and other datatypes
|
||||||
|
[NOTE]
|
||||||
|
==================================================
|
||||||
|
|
||||||
|
Historically, other field datatypes also used fielddata, but this has been replaced
|
||||||
|
by index-time, disk-based <<doc-values,`doc_values`>>.
|
||||||
|
|
||||||
|
==================================================
|
||||||
|
|
||||||
|
|
||||||
|
[[fielddata-loading]]
|
||||||
|
==== `fielddata.loading`
|
||||||
|
|
||||||
|
This per-field setting controls when fielddata is loaded into memory. It
|
||||||
|
accepts three options:
|
||||||
|
|
||||||
|
[horizontal]
|
||||||
|
`lazy`::
|
||||||
|
|
||||||
|
Fielddata is only loaded into memory when it is needed. (default)
|
||||||
|
|
||||||
|
`eager`::
|
||||||
|
|
||||||
|
Fielddata is loaded into memory before a new search segment becomes
|
||||||
|
visible to search. This can reduce the latency that a user may experience
|
||||||
|
if their search request has to trigger lazy loading from a big segment.
|
||||||
|
|
||||||
|
`eager_global_ordinals`::
|
||||||
|
|
||||||
|
Loading fielddata into memory is only part of the work that is required.
|
||||||
|
After loading the fielddata for each segment, Elasticsearch builds the
|
||||||
|
<<global-ordinals>> data structure to make a list of all unique terms
|
||||||
|
across all the segments in a shard. By default, global ordinals are built
|
||||||
|
lazily. If the field has a very high cardinality, global ordinals may
|
||||||
|
take some time to build, in which case you can use eager loading instead.
|
||||||
|
|
||||||
|
[[global-ordinals]]
|
||||||
|
.Global ordinals
|
||||||
|
*****************************************
|
||||||
|
|
||||||
|
Global ordinals is a data-structure on top of fielddata and doc values, that
|
||||||
|
maintains an incremental numbering for each unique term in a lexicographic
|
||||||
|
order. Each term has a unique number and the number of term 'A' is lower than
|
||||||
|
the number of term 'B'. Global ordinals are only supported on string fields.
|
||||||
|
|
||||||
|
Fielddata and doc values also have ordinals, which is a unique numbering for all terms
|
||||||
|
in a particular segment and field. Global ordinals just build on top of this,
|
||||||
|
by providing a mapping between the segment ordinals and the global ordinals,
|
||||||
|
the latter being unique across the entire shard.
|
||||||
|
|
||||||
|
Global ordinals are used for features that use segment ordinals, such as
|
||||||
|
sorting and the terms aggregation, to improve the execution time. A terms
|
||||||
|
aggregation relies purely on global ordinals to perform the aggregation at the
|
||||||
|
shard level, then converts global ordinals to the real term only for the final
|
||||||
|
reduce phase, which combines results from different shards.
|
||||||
|
|
||||||
|
Global ordinals for a specified field are tied to _all the segments of a
|
||||||
|
shard_, while fielddata and doc values ordinals are tied to a single segment.
|
||||||
|
which is different than for field data for a specific field which is tied to a
|
||||||
|
single segment. For this reason global ordinals need to be entirely rebuilt
|
||||||
|
whenever a once new segment becomes visible.
|
||||||
|
|
||||||
|
The loading time of global ordinals depends on the number of terms in a field, but in general
|
||||||
|
it is low, since it source field data has already been loaded. The memory overhead of global
|
||||||
|
ordinals is a small because it is very efficiently compressed. Eager loading of global ordinals
|
||||||
|
can move the loading time from the first search request, to the refresh itself.
|
||||||
|
|
||||||
|
*****************************************
|
||||||
|
|
||||||
|
[[field-data-filtering]]
|
||||||
|
==== `fielddata.filter`
|
||||||
|
|
||||||
|
Fielddata filtering can be used to reduce the number of terms loaded into
|
||||||
|
memory, and thus reduce memory usage. Terms can be filtered by _frequency_ or
|
||||||
|
by _regular expression_, or a combination of the two:
|
||||||
|
|
||||||
|
Filtering by frequency::
|
||||||
|
+
|
||||||
|
--
|
||||||
|
|
||||||
|
The frequency filter allows you to only load terms whose term frequency falls
|
||||||
|
between a `min` and `max` value, which can be expressed an absolute
|
||||||
|
number (when the number is bigger than 1.0) or as a percentage
|
||||||
|
(eg `0.01` is `1%` and `1.0` is `100%`). Frequency is calculated
|
||||||
|
*per segment*. Percentages are based on the number of docs which have a
|
||||||
|
value for the field, as opposed to all docs in the segment.
|
||||||
|
|
||||||
|
Small segments can be excluded completely by specifying the minimum
|
||||||
|
number of docs that the segment should contain with `min_segment_size`:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"tag": {
|
||||||
|
"type": "string",
|
||||||
|
"fielddata": {
|
||||||
|
"filter": {
|
||||||
|
"frequency": {
|
||||||
|
"min": 0.001,
|
||||||
|
"max": 0.1,
|
||||||
|
"min_segment_size": 500
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
--
|
||||||
|
|
||||||
|
Filtering by regex::
|
||||||
|
+
|
||||||
|
--
|
||||||
|
Terms can also be filtered by regular expression - only values which
|
||||||
|
match the regular expression are loaded. Note: the regular expression is
|
||||||
|
applied to each term in the field, not to the whole field value. For
|
||||||
|
instance, to only load hashtags from a tweet, we can use a regular
|
||||||
|
expression which matches terms beginning with `#`:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"tweet": {
|
||||||
|
"type": "string",
|
||||||
|
"analyzer": "whitespace",
|
||||||
|
"fielddata": {
|
||||||
|
"filter": {
|
||||||
|
"regex": {
|
||||||
|
"pattern": "^#.*"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
--
|
||||||
|
|
||||||
|
These filters can be updated on an existing field mapping and will take
|
||||||
|
effect the next time the fielddata for a segment is loaded. Use the
|
||||||
|
<<indices-clearcache,Clear Cache>> API
|
||||||
|
to reload the fielddata using the new filters.
|
|
@ -0,0 +1,281 @@
|
||||||
|
[[mapping-date-format]]
|
||||||
|
=== `format`
|
||||||
|
|
||||||
|
In JSON documents, dates are represented as strings. Elasticsearch uses a set
|
||||||
|
of preconfigured formats to recognize and parse these strings into a long
|
||||||
|
value representing _milliseconds-since-the-epoch_ in UTC.
|
||||||
|
|
||||||
|
Besides the <<built-in-date-formats,built-in formats>>, your own
|
||||||
|
<<custom-date-formats,custom formats>> can be specified using the familiar
|
||||||
|
`yyyy/MM/dd` syntax:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"date": {
|
||||||
|
"type": "date",
|
||||||
|
"format": "yyyy-MM-dd"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
Many APIs which support date values also support <<date-math,date math>>
|
||||||
|
expressions, such as `now-1m/d` -- the current time, minus one month, rounded
|
||||||
|
down to the nearest day.
|
||||||
|
|
||||||
|
[[custom-date-formats]]
|
||||||
|
==== Custom date formats
|
||||||
|
|
||||||
|
Completely customizable date formats are supported. The syntax for these is explained
|
||||||
|
http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html[in the Joda docs].
|
||||||
|
|
||||||
|
[[built-in-date-formats]]
|
||||||
|
==== Built In Formats
|
||||||
|
|
||||||
|
Most of the below dates have a `strict` companion dates, which means, that
|
||||||
|
year, month and day parts of the week must have prepending zeros in order
|
||||||
|
to be valid. This means, that a date like `5/11/1` would not be valid, but
|
||||||
|
you would need to specify the full date, which would be `2005/11/01` in this
|
||||||
|
example. So instead of `date_optional_time` you would need to specify
|
||||||
|
`strict_date_optional_time`.
|
||||||
|
|
||||||
|
The following tables lists all the defaults ISO formats supported:
|
||||||
|
|
||||||
|
`epoch_millis`::
|
||||||
|
|
||||||
|
A formatter for the number of milliseconds since the epoch. Note, that
|
||||||
|
this timestamp allows a max length of 13 chars, so dates older than 1653
|
||||||
|
and 2286 are not supported. You should use a different date formatter in
|
||||||
|
that case.
|
||||||
|
|
||||||
|
`epoch_second`::
|
||||||
|
|
||||||
|
A formatter for the number of seconds since the epoch. Note, that this
|
||||||
|
timestamp allows a max length of 10 chars, so dates older than 1653 and
|
||||||
|
2286 are not supported. You should use a different date formatter in that
|
||||||
|
case.
|
||||||
|
|
||||||
|
[[strict-date-time]]`date_optional_time` or `strict_date_optional_time`::
|
||||||
|
|
||||||
|
A generic ISO datetime parser where the date is mandatory and the time is
|
||||||
|
optional.
|
||||||
|
http://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateOptionalTimeParser--[Full details here].
|
||||||
|
|
||||||
|
`basic_date`::
|
||||||
|
|
||||||
|
A basic formatter for a full date as four digit year, two digit month of
|
||||||
|
year, and two digit day of month: `yyyyMMdd`.
|
||||||
|
|
||||||
|
`basic_date_time`::
|
||||||
|
|
||||||
|
A basic formatter that combines a basic date and time, separated by a 'T':
|
||||||
|
`yyyyMMdd'T'HHmmss.SSSZ`.
|
||||||
|
|
||||||
|
`basic_date_time_no_millis`::
|
||||||
|
|
||||||
|
A basic formatter that combines a basic date and time without millis,
|
||||||
|
separated by a 'T': `yyyyMMdd'T'HHmmssZ`.
|
||||||
|
|
||||||
|
`basic_ordinal_date`::
|
||||||
|
|
||||||
|
A formatter for a full ordinal date, using a four digit year and three
|
||||||
|
digit dayOfYear: `yyyyDDD`.
|
||||||
|
|
||||||
|
`basic_ordinal_date_time`::
|
||||||
|
|
||||||
|
A formatter for a full ordinal date and time, using a four digit year and
|
||||||
|
three digit dayOfYear: `yyyyDDD'T'HHmmss.SSSZ`.
|
||||||
|
|
||||||
|
`basic_ordinal_date_time_no_millis`::
|
||||||
|
|
||||||
|
A formatter for a full ordinal date and time without millis, using a four
|
||||||
|
digit year and three digit dayOfYear: `yyyyDDD'T'HHmmssZ`.
|
||||||
|
|
||||||
|
`basic_time`::
|
||||||
|
|
||||||
|
A basic formatter for a two digit hour of day, two digit minute of hour,
|
||||||
|
two digit second of minute, three digit millis, and time zone offset:
|
||||||
|
`HHmmss.SSSZ`.
|
||||||
|
|
||||||
|
`basic_time_no_millis`::
|
||||||
|
|
||||||
|
A basic formatter for a two digit hour of day, two digit minute of hour,
|
||||||
|
two digit second of minute, and time zone offset: `HHmmssZ`.
|
||||||
|
|
||||||
|
`basic_t_time`::
|
||||||
|
|
||||||
|
A basic formatter for a two digit hour of day, two digit minute of hour,
|
||||||
|
two digit second of minute, three digit millis, and time zone off set
|
||||||
|
prefixed by 'T': `'T'HHmmss.SSSZ`.
|
||||||
|
|
||||||
|
`basic_t_time_no_millis`::
|
||||||
|
|
||||||
|
A basic formatter for a two digit hour of day, two digit minute of hour,
|
||||||
|
two digit second of minute, and time zone offset prefixed by 'T':
|
||||||
|
`'T'HHmmssZ`.
|
||||||
|
|
||||||
|
`basic_week_date` or `strict_basic_week_date`::
|
||||||
|
|
||||||
|
A basic formatter for a full date as four digit weekyear, two digit week
|
||||||
|
of weekyear, and one digit day of week: `xxxx'W'wwe`.
|
||||||
|
|
||||||
|
`basic_week_date_time` or `strict_basic_week_date_time`::
|
||||||
|
|
||||||
|
A basic formatter that combines a basic weekyear date and time, separated
|
||||||
|
by a 'T': `xxxx'W'wwe'T'HHmmss.SSSZ`.
|
||||||
|
|
||||||
|
`basic_week_date_time_no_millis` or `strict_basic_week_date_time_no_millis`::
|
||||||
|
|
||||||
|
A basic formatter that combines a basic weekyear date and time without
|
||||||
|
millis, separated by a 'T': `xxxx'W'wwe'T'HHmmssZ`.
|
||||||
|
|
||||||
|
`date` or `strict_date`::
|
||||||
|
|
||||||
|
A formatter for a full date as four digit year, two digit month of year,
|
||||||
|
and two digit day of month: `yyyy-MM-dd`.
|
||||||
|
|
||||||
|
`date_hour` or `strict_date_hour`::
|
||||||
|
|
||||||
|
A formatter that combines a full date and two digit hour of day.
|
||||||
|
|
||||||
|
`date_hour_minute` or `strict_date_hour_minute`::
|
||||||
|
|
||||||
|
A formatter that combines a full date, two digit hour of day, and two
|
||||||
|
digit minute of hour.
|
||||||
|
|
||||||
|
`date_hour_minute_second` or `strict_date_hour_minute_second`::
|
||||||
|
|
||||||
|
A formatter that combines a full date, two digit hour of day, two digit
|
||||||
|
minute of hour, and two digit second of minute.
|
||||||
|
|
||||||
|
`date_hour_minute_second_fraction` or `strict_date_hour_minute_second_fraction`::
|
||||||
|
|
||||||
|
A formatter that combines a full date, two digit hour of day, two digit
|
||||||
|
minute of hour, two digit second of minute, and three digit fraction of
|
||||||
|
second: `yyyy-MM-dd'T'HH:mm:ss.SSS`.
|
||||||
|
|
||||||
|
`date_hour_minute_second_millis` or `strict_date_hour_minute_second_millis`::
|
||||||
|
|
||||||
|
A formatter that combines a full date, two digit hour of day, two digit
|
||||||
|
minute of hour, two digit second of minute, and three digit fraction of
|
||||||
|
second: `yyyy-MM-dd'T'HH:mm:ss.SSS`.
|
||||||
|
|
||||||
|
`date_time` or `strict_date_time`::
|
||||||
|
|
||||||
|
A formatter that combines a full date and time, separated by a 'T': `yyyy-
|
||||||
|
MM-dd'T'HH:mm:ss.SSSZZ`.
|
||||||
|
|
||||||
|
`date_time_no_millis` or `strict_date_time_no_millis`::
|
||||||
|
|
||||||
|
A formatter that combines a full date and time without millis, separated
|
||||||
|
by a 'T': `yyyy-MM-dd'T'HH:mm:ssZZ`.
|
||||||
|
|
||||||
|
`hour` or `strict_hour`::
|
||||||
|
|
||||||
|
A formatter for a two digit hour of day.
|
||||||
|
|
||||||
|
`hour_minute` or `strict_hour_minute`::
|
||||||
|
|
||||||
|
A formatter for a two digit hour of day and two digit minute of hour.
|
||||||
|
|
||||||
|
`hour_minute_second` or `strict_hour_minute_second`::
|
||||||
|
|
||||||
|
A formatter for a two digit hour of day, two digit minute of hour, and two
|
||||||
|
digit second of minute.
|
||||||
|
|
||||||
|
`hour_minute_second_fraction` or `strict_hour_minute_second_fraction`::
|
||||||
|
|
||||||
|
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||||
|
digit second of minute, and three digit fraction of second: `HH:mm:ss.SSS`.
|
||||||
|
|
||||||
|
`hour_minute_second_millis` or `strict_hour_minute_second_millis`::
|
||||||
|
|
||||||
|
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||||
|
digit second of minute, and three digit fraction of second: `HH:mm:ss.SSS`.
|
||||||
|
|
||||||
|
`ordinal_date` or `strict_ordinal_date`::
|
||||||
|
|
||||||
|
A formatter for a full ordinal date, using a four digit year and three
|
||||||
|
digit dayOfYear: `yyyy-DDD`.
|
||||||
|
|
||||||
|
`ordinal_date_time` or `strict_ordinal_date_time`::
|
||||||
|
|
||||||
|
A formatter for a full ordinal date and time, using a four digit year and
|
||||||
|
three digit dayOfYear: `yyyy-DDD'T'HH:mm:ss.SSSZZ`.
|
||||||
|
|
||||||
|
`ordinal_date_time_no_millis` or `strict_ordinal_date_time_no_millis`::
|
||||||
|
|
||||||
|
A formatter for a full ordinal date and time without millis, using a four
|
||||||
|
digit year and three digit dayOfYear: `yyyy-DDD'T'HH:mm:ssZZ`.
|
||||||
|
|
||||||
|
`time` or `strict_time`::
|
||||||
|
|
||||||
|
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||||
|
digit second of minute, three digit fraction of second, and time zone
|
||||||
|
offset: `HH:mm:ss.SSSZZ`.
|
||||||
|
|
||||||
|
`time_no_millis` or `strict_time_no_millis`::
|
||||||
|
|
||||||
|
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||||
|
digit second of minute, and time zone offset: `HH:mm:ssZZ`.
|
||||||
|
|
||||||
|
`t_time` or `strict_t_time`::
|
||||||
|
|
||||||
|
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||||
|
digit second of minute, three digit fraction of second, and time zone
|
||||||
|
offset prefixed by 'T': `'T'HH:mm:ss.SSSZZ`.
|
||||||
|
|
||||||
|
`t_time_no_millis` or `strict_t_time_no_millis`::
|
||||||
|
|
||||||
|
A formatter for a two digit hour of day, two digit minute of hour, two
|
||||||
|
digit second of minute, and time zone offset prefixed by 'T': `'T'HH:mm:ssZZ`.
|
||||||
|
|
||||||
|
`week_date` or `strict_week_date`::
|
||||||
|
|
||||||
|
A formatter for a full date as four digit weekyear, two digit week of
|
||||||
|
weekyear, and one digit day of week: `xxxx-'W'ww-e`.
|
||||||
|
|
||||||
|
`week_date_time` or `strict_week_date_time`::
|
||||||
|
|
||||||
|
A formatter that combines a full weekyear date and time, separated by a
|
||||||
|
'T': `xxxx-'W'ww-e'T'HH:mm:ss.SSSZZ`.
|
||||||
|
|
||||||
|
`week_date_time_no_millis` or `strict_week_date_time_no_millis`::
|
||||||
|
|
||||||
|
A formatter that combines a full weekyear date and time without millis,
|
||||||
|
separated by a 'T': `xxxx-'W'ww-e'T'HH:mm:ssZZ`.
|
||||||
|
|
||||||
|
`weekyear` or `strict_weekyear`::
|
||||||
|
|
||||||
|
A formatter for a four digit weekyear.
|
||||||
|
|
||||||
|
`weekyear_week` or `strict_weekyear_week`::
|
||||||
|
|
||||||
|
A formatter for a four digit weekyear and two digit week of weekyear.
|
||||||
|
|
||||||
|
`weekyear_week_day` or `strict_weekyear_week_day`::
|
||||||
|
|
||||||
|
A formatter for a four digit weekyear, two digit week of weekyear, and one
|
||||||
|
digit day of week.
|
||||||
|
|
||||||
|
`year` or `strict_year`::
|
||||||
|
|
||||||
|
A formatter for a four digit year.
|
||||||
|
|
||||||
|
`year_month` or `strict_year_month`::
|
||||||
|
|
||||||
|
A formatter for a four digit year and two digit month of year.
|
||||||
|
|
||||||
|
`year_month_day` or `strict_year_month_day`::
|
||||||
|
|
||||||
|
A formatter for a four digit year, two digit month of year, and two digit
|
||||||
|
day of month.
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
[[geohash-precision]]
|
||||||
|
=== `geohash_precision`
|
||||||
|
|
||||||
|
Geohashes are a form of lat/lon encoding which divides the earth up into
|
||||||
|
a grid. Each cell in this grid is represented by a geohash string. Each
|
||||||
|
cell in turn can be further subdivided into smaller cells which are
|
||||||
|
represented by a longer string. So the longer the geohash, the smaller
|
||||||
|
(and thus more accurate) the cell is.
|
||||||
|
|
||||||
|
The `geohash_precision` setting controls the length of the geohash that is
|
||||||
|
indexed when the <<geohash,`geohash`>> option is enabled, and the maximum
|
||||||
|
geohash length when the <<geohash-prefix,`geohash_prefix`>> option is enabled.
|
||||||
|
|
||||||
|
It accepts:
|
||||||
|
|
||||||
|
* a number between 1 and 12 (default), which represents the length of the geohash.
|
||||||
|
* a <<distance-units,distance>>, e.g. `1km`.
|
||||||
|
|
||||||
|
If a distance is specified, it will be translated to the smallest
|
||||||
|
geohash-length that will provide the requested resolution.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "geo_point",
|
||||||
|
"geohash_prefix": true,
|
||||||
|
"geohash_precision": 6 <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"location": {
|
||||||
|
"lat": 41.12,
|
||||||
|
"lon": -71.34
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_search?fielddata_fields=location.geohash
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"term": {
|
||||||
|
"location.geohash": "drm3bt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> A `geohash_precision` of 6 equates to geohash cells of approximately 1.26km x 0.6km
|
|
@ -0,0 +1,64 @@
|
||||||
|
[[geohash-prefix]]
|
||||||
|
=== `geohash_prefix`
|
||||||
|
|
||||||
|
Geohashes are a form of lat/lon encoding which divides the earth up into
|
||||||
|
a grid. Each cell in this grid is represented by a geohash string. Each
|
||||||
|
cell in turn can be further subdivided into smaller cells which are
|
||||||
|
represented by a longer string. So the longer the geohash, the smaller
|
||||||
|
(and thus more accurate) the cell is.
|
||||||
|
|
||||||
|
While the <<geohash,`geohash`>> option enables indexing the geohash that
|
||||||
|
corresponds to the lat/lon point, at the specified
|
||||||
|
<<geohash-precision,precision>>, the `geohash_prefix` option will also
|
||||||
|
index all the enclosing cells as well.
|
||||||
|
|
||||||
|
For instance, a geohash of `drm3btev3e86` will index all of the following
|
||||||
|
terms: [ `d`, `dr`, `drm`, `drm3`, `drm3b`, `drm3bt`, `drm3bte`, `drm3btev`,
|
||||||
|
`drm3btev3`, `drm3btev3e`, `drm3btev3e8`, `drm3btev3e86` ].
|
||||||
|
|
||||||
|
The geohash prefixes can be used with the
|
||||||
|
<<query-dsl-geohash-cell-query,`geohash_cell` query>> to find points within a
|
||||||
|
particular geohash, or its neighbours:
|
||||||
|
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "geo_point",
|
||||||
|
"geohash_prefix": true,
|
||||||
|
"geohash_precision": 6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"location": {
|
||||||
|
"lat": 41.12,
|
||||||
|
"lon": -71.34
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_search?fielddata_fields=location.geohash
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"geohash_cell": {
|
||||||
|
"location": {
|
||||||
|
"lat": 41.02,
|
||||||
|
"lon": -71.48
|
||||||
|
},
|
||||||
|
"precision": 4, <1>
|
||||||
|
"neighbors": true <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
[[geohash]]
|
||||||
|
=== `geohash`
|
||||||
|
|
||||||
|
Geohashes are a form of lat/lon encoding which divides the earth up into
|
||||||
|
a grid. Each cell in this grid is represented by a geohash string. Each
|
||||||
|
cell in turn can be further subdivided into smaller cells which are
|
||||||
|
represented by a longer string. So the longer the geohash, the smaller
|
||||||
|
(and thus more accurate) the cell is.
|
||||||
|
|
||||||
|
Because geohashes are just strings, they can be stored in an inverted
|
||||||
|
index like any other string, which makes querying them very efficient.
|
||||||
|
|
||||||
|
If you enable the `geohash` option, a `geohash` ``sub-field'' will be indexed
|
||||||
|
as, eg `.geohash`. The length of the geohash is controlled by the
|
||||||
|
<<geohash-precision,`geohash_precision`>> parameter.
|
||||||
|
|
||||||
|
If the <<geohash-prefix,`geohash_prefix`>> option is enabled, the `geohash`
|
||||||
|
option will be enabled automatically.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "geo_point", <1>
|
||||||
|
"geohash": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"location": {
|
||||||
|
"lat": 41.12,
|
||||||
|
"lon": -71.34
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_search?fielddata_fields=location.geohash <2>
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"prefix": {
|
||||||
|
"location.geohash": "drm3b" <3>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> A `location.geohash` field will be indexed for each geo-point.
|
||||||
|
<2> The geohash can be retrieved with <<doc-values,`doc_values`>>.
|
||||||
|
<3> A <<query-dsl-prefix-query,`prefix`>> query can find all geohashes which start with a particular prefix.
|
||||||
|
|
||||||
|
[WARNING]
|
||||||
|
============================================
|
||||||
|
|
||||||
|
A `prefix` query on geohashes is expensive. Instead, consider using the
|
||||||
|
<<geohash-prefix,`geohash_prefix`>> to pay the expense once at index time
|
||||||
|
instead of on every query.
|
||||||
|
|
||||||
|
============================================
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
[[ignore-above]]
|
||||||
|
=== `ignore_above`
|
||||||
|
|
||||||
|
Strings longer than the `ignore_above` setting will not be processed by the
|
||||||
|
<<analyzer,analyzer>> and will not be indexed. This is mainly useful for
|
||||||
|
<<mapping-index,`not_analyzed`>> string fields, which are typically used for
|
||||||
|
filtering, aggregations, and sorting. These are structured fields and it
|
||||||
|
doesn't usually make sense to allow very long terms to be indexed in these
|
||||||
|
fields.
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"message": {
|
||||||
|
"type": "string",
|
||||||
|
"index": "not_analyzed",
|
||||||
|
"ignore_above": 20 <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1 <2>
|
||||||
|
{
|
||||||
|
"message": "Syntax error"
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/2 <3>
|
||||||
|
{
|
||||||
|
"message": "Syntax error with some long stacktrace"
|
||||||
|
}
|
||||||
|
|
||||||
|
GET _search <4>
|
||||||
|
{
|
||||||
|
"aggs": {
|
||||||
|
"messages": {
|
||||||
|
"terms": {
|
||||||
|
"field": "message"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> This field will ignore any string longer than 20 characters.
|
||||||
|
<2> This document is indexed successfully.
|
||||||
|
<3> This document will be indexed, but without indexing the `message` field.
|
||||||
|
<4> Search returns both documents, but only the first is present in the terms aggregation.
|
||||||
|
|
||||||
|
This option is also useful for protecting against Lucene's term byte-length
|
||||||
|
limit of `32766`.
|
||||||
|
|
||||||
|
NOTE: The value for `ignore_above` is the _character count_, but Lucene counts
|
||||||
|
bytes. If you use UTF-8 text with many non-ASCII characters, you may want to
|
||||||
|
set the limit to `32766 / 3 = 10922` since UTF-8 characters may occupy at most
|
||||||
|
3 bytes.
|
|
@ -0,0 +1,83 @@
|
||||||
|
[[ignore-malformed]]
|
||||||
|
=== `ignore_malformed`
|
||||||
|
|
||||||
|
Sometimes you don't have much control over the data that you receive. One
|
||||||
|
user may send a `login` field that is a <<date,`date`>>, and another sends a
|
||||||
|
`login` field that is an email address.
|
||||||
|
|
||||||
|
Trying to index the wrong datatype into a field throws an exception by
|
||||||
|
default, and rejects the whole document. The `ignore_malformed` parameter, if
|
||||||
|
set to `true`, allows the exception to be ignored. The malformed field is not
|
||||||
|
indexed, but other fields in the document are processed normally.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"number_one": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"number_two": {
|
||||||
|
"type": "integer",
|
||||||
|
"ignore_malformed": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"text": "Some text value",
|
||||||
|
"number_one": "foo" <1>
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/2
|
||||||
|
{
|
||||||
|
"text": "Some text value",
|
||||||
|
"number_two": "foo" <2>
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> This document will be rejected because `number_one` does not allow malformed values.
|
||||||
|
<2> This document will have the `text` field indexed, but not the `number_two` field.
|
||||||
|
|
||||||
|
|
||||||
|
[[ignore-malformed-setting]]
|
||||||
|
==== Index-level default
|
||||||
|
|
||||||
|
The `index.mapping.ignore_malformed` setting can be set on the index level to
|
||||||
|
allow to ignore malformed content globally across all mapping types.
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"settings": {
|
||||||
|
"index.mapping.ignore_malformed": true <1>
|
||||||
|
},
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"number_one": { <1>
|
||||||
|
"type": "byte"
|
||||||
|
},
|
||||||
|
"number_two": {
|
||||||
|
"type": "integer",
|
||||||
|
"ignore_malformed": false <2>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
<1> The `number_one` field inherits the index-level setting.
|
||||||
|
<2> The `number_two` field overrides the index-level setting to turn off `ignore_malformed`.
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
[[include-in-all]]
|
||||||
|
=== `include_in_all`
|
||||||
|
|
||||||
|
The `include_in_all` parameter provides per-field control over which fields
|
||||||
|
are included in the <<mapping-all-field,`_all`>> field. It defaults to `true`, unless <<mapping-index,`index`>> is set to `no`.
|
||||||
|
|
||||||
|
This example demonstrates how to exclude the `date` field from the `_all` field:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"title": { <1>
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
"content": { <1>
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"date": { <2>
|
||||||
|
"type": "date",
|
||||||
|
"include_in_all": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
<1> The `title` and `content` fields with be included in the `_all` field.
|
||||||
|
<2> The `date` field will not be included in the `_all` field.
|
||||||
|
|
||||||
|
The `include_in_all` parameter can also be set at the type level and on
|
||||||
|
<<object,`object`>> or <<nested,`nested`>> fields, in which case all sub-
|
||||||
|
fields inherit that setting. For instance:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"include_in_all": false, <1>
|
||||||
|
"properties": {
|
||||||
|
"title": { "type": "string" },
|
||||||
|
"author": {
|
||||||
|
"include_in_all": true, <2>
|
||||||
|
"properties": {
|
||||||
|
"first_name": { "type": "string" },
|
||||||
|
"last_name": { "type": "string" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"editor": {
|
||||||
|
"properties": {
|
||||||
|
"first_name": { "type": "string" }, <3>
|
||||||
|
"last_name": { "type": "string", "include_in_all": true } <3>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
<1> All fields in `my_type` are excluded from `_all`.
|
||||||
|
<2> The `author.first_name` and `author.last_name` fields are included in `_all`.
|
||||||
|
<3> Only the `editor.last_name` field is included in `_all`.
|
||||||
|
The `editor.first_name` inherits the type-level setting and is excluded.
|
||||||
|
|
||||||
|
[NOTE]
|
||||||
|
.Multi-fields and `include_in_all`
|
||||||
|
=================================
|
||||||
|
|
||||||
|
The original field value is added to the `_all` field, not the terms produced
|
||||||
|
by a field's analyzer. For this reason, it makes no sense to set
|
||||||
|
`include_in_all` to `true` on <<multi-fields,multi-fields>>, as each
|
||||||
|
multi-field has exactly the same value as its parent.
|
||||||
|
|
||||||
|
=================================
|
|
@ -0,0 +1,70 @@
|
||||||
|
[[index-options]]
|
||||||
|
=== `index_options`
|
||||||
|
|
||||||
|
The `index_options` parameter controls what information is added to the
|
||||||
|
inverted index, for search and highlighting purposes. It accepts the
|
||||||
|
following settings:
|
||||||
|
|
||||||
|
[horizontal]
|
||||||
|
`docs`::
|
||||||
|
|
||||||
|
Only the doc number is indexed. Can answer the question _Does this term
|
||||||
|
exist in this field?_
|
||||||
|
|
||||||
|
`freqs`::
|
||||||
|
|
||||||
|
Doc number and term frequencies are indexed. Term frequencies are used to
|
||||||
|
score repeated terms higher than single terms.
|
||||||
|
|
||||||
|
`positions`::
|
||||||
|
|
||||||
|
Doc number, term frequencies, and term positions (or order) are indexed.
|
||||||
|
Positions can be used for
|
||||||
|
<<query-dsl-match-query-phrase,proximity or phrase queries>>.
|
||||||
|
|
||||||
|
`offsets`::
|
||||||
|
|
||||||
|
Doc number, term frequencies, positions, and start and end character
|
||||||
|
offsets (which map the term back to the original string) are indexed.
|
||||||
|
Offsets are used by the <<postings-highlighter,postings highlighter>>.
|
||||||
|
|
||||||
|
<<mapping-index,Analyzed>> string fields use `positions` as the default, and
|
||||||
|
<<all other fields use `docs` as the default.
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"text": {
|
||||||
|
"type": "string",
|
||||||
|
"index_options": "offsets"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"text": "Quick brown fox"
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"match": {
|
||||||
|
"text": "brown fox"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"highlight": {
|
||||||
|
"fields": {
|
||||||
|
"text": {} <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `text` field will use the postings highlighter by default because `offsets` are indexed.
|
|
@ -0,0 +1,48 @@
|
||||||
|
[[mapping-index]]
|
||||||
|
=== `index`
|
||||||
|
|
||||||
|
The `index` option controls how field values are indexed and, thus, how they
|
||||||
|
are searchable. It accepts three values:
|
||||||
|
|
||||||
|
[horizontal]
|
||||||
|
`no`::
|
||||||
|
|
||||||
|
Do not add this field value to the index. With this setting, the field
|
||||||
|
will not be queryable.
|
||||||
|
|
||||||
|
`not_analyzed`::
|
||||||
|
|
||||||
|
Add the field value to the index unchanged, as a single term. This is the
|
||||||
|
default for all fields that support this option except for
|
||||||
|
<<string,`string`>> fields. `not_analyzed` fields are usually used with
|
||||||
|
<<term-level-queries,term-level queries>> for structured search.
|
||||||
|
|
||||||
|
`analyzed`::
|
||||||
|
|
||||||
|
This option applies only to `string` fields, for which it is the default.
|
||||||
|
The string field value is first <<analysis,analyzed>> to convert the
|
||||||
|
string into terms (e.g. a list of individual words), which are then
|
||||||
|
indexed. At search time, the the query string is passed through
|
||||||
|
(<<search-analyzer,usually>>) the same analyzer to generate terms
|
||||||
|
in the same format as those in the index. It is this process that enables
|
||||||
|
<<full-text-queries,full text search>>.
|
||||||
|
|
||||||
|
For example, you can create a `not_analyzed` string field with the following:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT /my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"status_code": {
|
||||||
|
"type": "string",
|
||||||
|
"index": "not_analyzed"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
|
@ -0,0 +1,63 @@
|
||||||
|
[[lat-lon]]
|
||||||
|
=== `lat_lon`
|
||||||
|
|
||||||
|
<<geo-queries,Geo-queries>> are usually performed by plugging the value of
|
||||||
|
each <<geo-point,`geo_point`>> field into a formula to determine whether it
|
||||||
|
falls into the required area or not. Unlike most queries, the inverted index
|
||||||
|
is not involved.
|
||||||
|
|
||||||
|
Setting `lat_lon` to `true` causes the latitude and longitude values to be
|
||||||
|
indexed as numeric fields (called `.lat` and `.lon`). These fields can be used
|
||||||
|
by the <<query-dsl-geo-bounding-box-query,`geo_bounding_box`>> and
|
||||||
|
<<query-dsl-geo-distance-query,`geo_distance`>> queries instead of
|
||||||
|
performing in-memory calculations.
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "geo_point",
|
||||||
|
"lat_lon": true <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"location": {
|
||||||
|
"lat": 41.12,
|
||||||
|
"lon": -71.34
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
GET my_index/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"geo_distance": {
|
||||||
|
"location": {
|
||||||
|
"lat": 41,
|
||||||
|
"lon": -71
|
||||||
|
},
|
||||||
|
"distance": "50km",
|
||||||
|
"optimize_bbox": "indexed" <2>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> Setting `lat_lon` to true indexes the geo-point in the `location.lat` and `location.lon` fields.
|
||||||
|
<2> The `indexed` option tells the geo-distance query to use the inverted index instead of the in-memory calculation.
|
||||||
|
|
||||||
|
Whether the in-memory or indexed operation performs better depends both on
|
||||||
|
your dataset and on the types of queries that you are running.
|
||||||
|
|
||||||
|
NOTE: The `lat_lon` option only makes sense for single-value `geo_point`
|
||||||
|
fields. It will not work with arrays of geo-points.
|
||||||
|
|
|
@ -0,0 +1,132 @@
|
||||||
|
[[multi-fields]]
|
||||||
|
=== `fields`
|
||||||
|
|
||||||
|
It is often useful to index the same field in different ways for different
|
||||||
|
purposes. This is the purpose of _multi-fields_. For instance, a `string`
|
||||||
|
field could be <<mapping-index,indexed>> as an `analyzed` field for full-text
|
||||||
|
search, and as a `not_analyzed` field for sorting or aggregations:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT /my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"city": {
|
||||||
|
"type": "string",
|
||||||
|
"fields": {
|
||||||
|
"raw": { <1>
|
||||||
|
"type": "string",
|
||||||
|
"index": "not_analyzed"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT /my_index/my_type/1
|
||||||
|
{
|
||||||
|
"city": "New York"
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT /my_index/my_type/2
|
||||||
|
{
|
||||||
|
"city": "York"
|
||||||
|
}
|
||||||
|
|
||||||
|
GET /my_index/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"match": {
|
||||||
|
"city": "york" <2>
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sort": {
|
||||||
|
"city.raw": "asc" <3>
|
||||||
|
},
|
||||||
|
"aggs": {
|
||||||
|
"Cities": {
|
||||||
|
"terms": {
|
||||||
|
"field": "city.raw" <3>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `city.raw` field is a `not_analyzed` version of the `city` field.
|
||||||
|
<2> The analyzed `city` field can be used for full text search.
|
||||||
|
<3> The `city.raw` field can be used for sorting and aggregations
|
||||||
|
|
||||||
|
NOTE: Multi-fields do not change the original `_source` field.
|
||||||
|
|
||||||
|
==== Multi-fields with multiple analyzers
|
||||||
|
|
||||||
|
Another use case of multi-fields is to analyze the same field in different
|
||||||
|
ways for better relevance. For instance we could index a field with the
|
||||||
|
<<analysis-standard-analyzer,`standard` analyzer>> which breaks text up into
|
||||||
|
words, and again with the <<english-analyzer,`english` analyzer>>
|
||||||
|
which stems words into their root form:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"text": { <1>
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"fields": {
|
||||||
|
"english": { <2>
|
||||||
|
"type": "string",
|
||||||
|
"analyzer": "english"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{ "text": "quick brown fox" } <3>
|
||||||
|
|
||||||
|
PUT my_index/my_type/2
|
||||||
|
{ "text": "quick brown foxes" } <3>
|
||||||
|
|
||||||
|
GET my_index/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"multi_match": {
|
||||||
|
"query": "quick brown foxes",
|
||||||
|
"fields": [ <4>
|
||||||
|
"text",
|
||||||
|
"text.english"
|
||||||
|
],
|
||||||
|
"type": "most_fields" <4>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
<1> The `text` field uses the `standard` analyzer.
|
||||||
|
<2> The `text.english` field uses the `english` analyzer.
|
||||||
|
<3> Index two documents, one with `fox` and the other with `foxes`.
|
||||||
|
<4> Query both the `text` and `text.english` fields and combine the scores.
|
||||||
|
|
||||||
|
The `text` field contains the term `fox` in the first document and `foxes` in
|
||||||
|
the second document. The `text.english` field contains `fox` for both
|
||||||
|
documents, because `foxes` is stemmed to `fox`.
|
||||||
|
|
||||||
|
The query string is also analyzed by the `standard` analyzer for the `text`
|
||||||
|
field, and by the `english` analyzer` for the `text.english` field. The
|
||||||
|
stemmed field allows a query for `foxes` to also match the document containing
|
||||||
|
just `fox`. This allows us to match as many documents as possible. By also
|
||||||
|
querying the unstemmed `text` field, we improve the relevance score of the
|
||||||
|
document which matches `foxes` exactly.
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
[[norms]]
|
||||||
|
=== `norms`
|
||||||
|
|
||||||
|
Norms store various normalization factors -- a number to represent the
|
||||||
|
relative field length and the <<index-boost,index time `boost`>> setting --
|
||||||
|
that are later used at query time in order to compute the score of a document
|
||||||
|
relatively to a query.
|
||||||
|
|
||||||
|
Although useful for scoring, norms also require quite a lot of memory
|
||||||
|
(typically in the order of one byte per document per field in your index, even
|
||||||
|
for documents that don't have this specific field). As a consequence, if you
|
||||||
|
don't need scoring on a specific field, you should disable norms on that
|
||||||
|
field. In particular, this is the case for fields that are used solely for
|
||||||
|
filtering or aggregations.
|
||||||
|
|
||||||
|
Norms can be disabled (but not reenabled) after the fact, using the
|
||||||
|
<<indices-put-mapping,PUT mapping API>> like so:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
------------
|
||||||
|
PUT my_index/_mapping/my_type
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"title": {
|
||||||
|
"type": "string",
|
||||||
|
"norms": {
|
||||||
|
"enabled": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
NOTE: Norms will not be removed instantly, but will be removed as old segments
|
||||||
|
are merged into new segments as you continue indexing new documents. Any score
|
||||||
|
computation on a field that has had norms removed might return inconsistent
|
||||||
|
results since some documents won't have norms anymore while other documents
|
||||||
|
might still have norms.
|
||||||
|
|
||||||
|
==== Lazy loading of norms
|
||||||
|
|
||||||
|
Norms can be loaded into memory eagerly (`eager`), whenever a new segment
|
||||||
|
comes online, or they can loaded lazily (`lazy`, default), only when the field
|
||||||
|
is queried.
|
||||||
|
|
||||||
|
Eager loading can be configured as follows:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
------------
|
||||||
|
PUT my_index/_mapping/my_type
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"title": {
|
||||||
|
"type": "string",
|
||||||
|
"norms": {
|
||||||
|
"loading": "eager"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
[[null-value]]
|
||||||
|
=== `null_value`
|
||||||
|
|
||||||
|
A `null` value cannot be indexed or searched. When a field is set to `null`,
|
||||||
|
(or an empty array or an array of `null` values) it is treated as though that
|
||||||
|
field has no values.
|
||||||
|
|
||||||
|
The `null_value` parameter allows you to replace explicit `null` values with
|
||||||
|
the specified value so that it can be indexed and searched. For instance:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"status_code": {
|
||||||
|
"type": "string",
|
||||||
|
"index": "not_analyzed",
|
||||||
|
"null_value": "NULL" <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"status_code": null
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/2
|
||||||
|
{
|
||||||
|
"status_code": [] <2>
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"term": {
|
||||||
|
"status_code": "NULL" <3>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> Replace explicit `null` values with the term `NULL`.
|
||||||
|
<2> An empty array does not contain an explicit `null`, and so won't be replaced with the `null_value`.
|
||||||
|
<3> A query for `NULL` returns document 1, but not document 2.
|
||||||
|
|
||||||
|
IMPORTANT: The `null_value` needs to be the same datatype as the field. For
|
||||||
|
instance, a `long` field cannot have a string `null_value`. String fields
|
||||||
|
which are `analyzed` will also pass the `null_value` through the configured
|
||||||
|
analyzer.
|
||||||
|
|
||||||
|
Also see the <<query-dsl-missing-query,`missing` query>> for its `null_value` support.
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
[[position-offset-gap]]
|
||||||
|
=== `position_offset_gap`
|
||||||
|
|
||||||
|
<<mapping-index,Analyzed>> string fields take term <<index-options,positions>>
|
||||||
|
into account, in order to be able to support
|
||||||
|
<<query-dsl-match-query-phrase,proximity or phrase queries>>.
|
||||||
|
When indexing an array of strings, each string of the array is indexed
|
||||||
|
directly after the previous one, almost as though all the strings in the array
|
||||||
|
had been concatenated into one big string.
|
||||||
|
|
||||||
|
This can result in matches from phrase queries spanning two array elements.
|
||||||
|
For instance:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT /my_index/groups/1
|
||||||
|
{
|
||||||
|
"names": [ "John Abraham", "Lincoln Smith"]
|
||||||
|
}
|
||||||
|
|
||||||
|
GET /my_index/groups/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"match_phrase": {
|
||||||
|
"names": "Abraham Lincoln" <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> This phrase query matches our document, even though `Abraham` and `Lincoln` are in separate strings.
|
||||||
|
|
||||||
|
The `position_offset_gap` can introduce a fake gap between each array element. For instance:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"names": {
|
||||||
|
"type": "string",
|
||||||
|
"position_offset_gap": 50 <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT /my_index/groups/1
|
||||||
|
{
|
||||||
|
"names": [ "John Abraham", "Lincoln Smith"]
|
||||||
|
}
|
||||||
|
|
||||||
|
GET /my_index/groups/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"match_phrase": {
|
||||||
|
"names": "Abraham Lincoln" <2>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The first term in the next array element will be 50 terms apart from the
|
||||||
|
last term in the previous array element.
|
||||||
|
<2> The phrase query no longer matches our document.
|
|
@ -0,0 +1,56 @@
|
||||||
|
[[precision-step]]
|
||||||
|
=== `precision_step`
|
||||||
|
|
||||||
|
Most <<number,numeric>> datatypes index extra terms representing numeric
|
||||||
|
ranges for each number to make <<query-dsl-range-query,`range` queries>>
|
||||||
|
faster. For instance, this `range` query:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
"range": {
|
||||||
|
"number": {
|
||||||
|
"gte": 0
|
||||||
|
"lte": 321
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
might be executed internally as a <<query-dsl-terms-query,`terms` query>> that
|
||||||
|
looks something like this:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
"terms": {
|
||||||
|
"number": [
|
||||||
|
"0-255",
|
||||||
|
"256-319"
|
||||||
|
"320",
|
||||||
|
"321"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
These extra terms greatly reduce the number of terms that have to be examined,
|
||||||
|
at the cost of increased disk space.
|
||||||
|
|
||||||
|
The default value for `precision_step` depends on the `type` of the numeric field:
|
||||||
|
|
||||||
|
[horizontal]
|
||||||
|
`long`, `double`, `date`, `ip`:: `16` (3 extra terms)
|
||||||
|
`integer`, `float`, `short`:: `8` (3 extra terms)
|
||||||
|
`byte`:: `2147483647` (0 extra terms)
|
||||||
|
`token_count`:: `32` (0 extra terms)
|
||||||
|
|
||||||
|
The value of the `precision_step` setting indicates the number of bits that
|
||||||
|
should be compressed into an extra term. A `long` value consists of 64 bits,
|
||||||
|
so a `precision_step` of 16 results in the following terms:
|
||||||
|
|
||||||
|
[horizontal]
|
||||||
|
Bits 0-15:: `value & 1111111111111111 0000000000000000 0000000000000000 0000000000000000`
|
||||||
|
Bits 0-31:: `value & 1111111111111111 1111111111111111 0000000000000000 0000000000000000`
|
||||||
|
Bits 0-47:: `value & 1111111111111111 1111111111111111 1111111111111111 0000000000000000`
|
||||||
|
Bits 0-63:: `value`
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
[[properties]]
|
||||||
|
=== `properties`
|
||||||
|
|
||||||
|
Type mappings, <<object,`object` fields>> and <<nested,`nested` fields>>
|
||||||
|
contain sub-fields, called `properties`. These properties may be of any
|
||||||
|
<<mapping-types,datatype>>, including `object` and `nested`. Properties can
|
||||||
|
be added:
|
||||||
|
|
||||||
|
* explicitly by defining them when <<indices-create-index,creating an index>>.
|
||||||
|
* explicitily by defining them when adding or updating a mapping type with the <<indices-put-mapping,PUT mapping>> API.
|
||||||
|
* <<dynamic-mapping,dynamically>> just by indexing documents containing new fields.
|
||||||
|
|
||||||
|
Below is an example of adding `properties` to a mapping type, an `object`
|
||||||
|
field, and a `nested` field:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": { <1>
|
||||||
|
"properties": {
|
||||||
|
"manager": { <2>
|
||||||
|
"properties": {
|
||||||
|
"age": { "type": "integer" },
|
||||||
|
"name": { "type": "string" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"employees": { <3>
|
||||||
|
"type": "nested",
|
||||||
|
"properties": {
|
||||||
|
"age": { "type": "integer" },
|
||||||
|
"name": { "type": "string" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1 <4>
|
||||||
|
{
|
||||||
|
"region": "US",
|
||||||
|
"manager": {
|
||||||
|
"name": "Alice White",
|
||||||
|
"age": 30
|
||||||
|
},
|
||||||
|
"employees": [
|
||||||
|
{
|
||||||
|
"name": "John Smith",
|
||||||
|
"age": 34
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Peter Brown",
|
||||||
|
"age": 26
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> Properties under the `my_type` mapping type.
|
||||||
|
<2> Properties under the `manager` object field.
|
||||||
|
<3> Properties under the `employees` nested field.
|
||||||
|
<4> An example document which corresponds to the above mapping.
|
||||||
|
|
||||||
|
==== Dot notation
|
||||||
|
|
||||||
|
Inner fields can be referred to in queries, aggregations, etc., using _dot
|
||||||
|
notation_:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
GET my_index/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"match": {
|
||||||
|
"manager.name": "Alice White" <1>
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggs": {
|
||||||
|
"Employees": {
|
||||||
|
"nested": {
|
||||||
|
"path": "employees"
|
||||||
|
},
|
||||||
|
"aggs": {
|
||||||
|
"Employee Ages": {
|
||||||
|
"histogram": {
|
||||||
|
"field": "employees.age", <2>
|
||||||
|
"interval": 5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
IMPORTANT: The full path to the inner field must be specified.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
[[search-analyzer]]
|
||||||
|
=== `search_analyzer`
|
||||||
|
|
||||||
|
Usually, the same <<analyzer,analyzer>> should be applied at index time and at
|
||||||
|
search time, to ensure that the terms in the query are in the same format as
|
||||||
|
the terms in the inverted index.
|
||||||
|
|
||||||
|
Sometimes, though, it can make sense to use a different analyzer at search
|
||||||
|
time, such as when using the <<analysis-edgengram-tokenizer,`edge_ngram`>>
|
||||||
|
tokenizer for autocomplete.
|
||||||
|
|
||||||
|
By default, queries will use the `analyzer` defined in the field mapping, but
|
||||||
|
this can be overridden with the `search_analyzer` setting:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT /my_index
|
||||||
|
{
|
||||||
|
"settings": {
|
||||||
|
"analysis": {
|
||||||
|
"filter": {
|
||||||
|
"autocomplete_filter": {
|
||||||
|
"type": "edge_ngram",
|
||||||
|
"min_gram": 1,
|
||||||
|
"max_gram": 20
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"analyzer": {
|
||||||
|
"autocomplete": { <1>
|
||||||
|
"type": "custom",
|
||||||
|
"tokenizer": "standard",
|
||||||
|
"filter": [
|
||||||
|
"lowercase",
|
||||||
|
"autocomplete_filter"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"text": {
|
||||||
|
"type": "string",
|
||||||
|
"analyzer": "autocomplete", <2>
|
||||||
|
"search_analyzer": "standard" <2>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"text": "Quick Brown Fox" <3>
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"match": {
|
||||||
|
"text": {
|
||||||
|
"query": "Quick Br", <4>
|
||||||
|
"operator": "and"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
<1> Analysis settings to define the custom `autocomplete` analyzer.
|
||||||
|
<2> The `text` field uses the `autocomplete` analyzer at index time, but the `standard` analyzer at search time.
|
||||||
|
<3> This field is indexed as the terms: [ `q`, `qu`, `qui`, `quic`, `quick`, `b`, `br`, `bro`, `brow`, `brown`, `f`, `fo`, `fox` ]
|
||||||
|
<4> The query searches for both of these terms: [ `quick`, `br` ]
|
||||||
|
|
||||||
|
See {defguide}/_index_time_search_as_you_type.html[Index time search-as-you-
|
||||||
|
type] for a full explanation of this example.
|
|
@ -0,0 +1,54 @@
|
||||||
|
[[similarity]]
|
||||||
|
=== `similarity`
|
||||||
|
|
||||||
|
Elasticsearch allows you to configure a scoring algorithm or _similarity_ per
|
||||||
|
field. The `similarity` setting provides a simple way of choosing a similarity
|
||||||
|
algorithm other than the default TF/IDF, such as `BM25`.
|
||||||
|
|
||||||
|
Similarities are mostly useful for <<string,`string`>> fields, especially
|
||||||
|
`analyzed` string fields, but can also apply to other field types.
|
||||||
|
|
||||||
|
Custom similarites can be configured by tuning the parameters of the built-in
|
||||||
|
similarities. For more details about this expert options, see the
|
||||||
|
<<index-modules-similarity,similarity module>>.
|
||||||
|
|
||||||
|
The only similarities which can be used out of the box, without any further
|
||||||
|
configuration are:
|
||||||
|
|
||||||
|
`default`::
|
||||||
|
The Default TF/IDF algorithm used by Elasticsearch and
|
||||||
|
Lucene. See {defguide}/practical-scoring-function.html[Lucene’s Practical Scoring Function]
|
||||||
|
for more information.
|
||||||
|
|
||||||
|
`BM25`::
|
||||||
|
The Okapi BM25 algorithm.
|
||||||
|
See {defguide}/pluggable-similarites.html[Plugggable Similarity Algorithms]
|
||||||
|
for more information.
|
||||||
|
|
||||||
|
|
||||||
|
The `similarity` can be set on the field level when a field is first created,
|
||||||
|
as follows:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"default_field": { <1>
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"bm25_field": {
|
||||||
|
"type": "string",
|
||||||
|
"similarity": "BM25" <2>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `default_field` uses the `default` similarity (ie TF/IDF).
|
||||||
|
<2> The `bm25_field` uses the `BM25` similarity.
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
[[mapping-store]]
|
||||||
|
=== `store`
|
||||||
|
|
||||||
|
By default, field values <<mapping-index,indexed>> to make them searchable,
|
||||||
|
but they are not _stored_. This means that the field can be queried, but the
|
||||||
|
original field value cannot be retrieved.
|
||||||
|
|
||||||
|
Usually this doesn't matter. The field value is already part of the
|
||||||
|
<<mapping-source-field,`_source` field>>, which is stored by default. If you
|
||||||
|
only want to retrieve the value of a single field or of a few fields, instead
|
||||||
|
of the whole `_source`, then this can be achieved with
|
||||||
|
<<search-request-source-filtering,source filtering>>.
|
||||||
|
|
||||||
|
In certain situations it can make sense to `store` a field. For instance, if
|
||||||
|
you have a document with a `title`, a `date`, and a very large `content`
|
||||||
|
field, you may want to retrieve just the `title` and the `date` without having
|
||||||
|
to extract those fields from a large `_source` field:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT /my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"title": {
|
||||||
|
"type": "string",
|
||||||
|
"store": true <1>
|
||||||
|
},
|
||||||
|
"date": {
|
||||||
|
"type": "date",
|
||||||
|
"store": true <1>
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT /my_index/my_type/1
|
||||||
|
{
|
||||||
|
"title": "Some short title",
|
||||||
|
"date": "2015-01-01",
|
||||||
|
"content": "A very long content field..."
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_search
|
||||||
|
{
|
||||||
|
"fields": [ "title", "date" ] <2>
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
<1> The `title` and `date` fields are stored.
|
||||||
|
<2> This request will retrieve the values of the `title` and `date` fields.
|
||||||
|
|
||||||
|
[NOTE]
|
||||||
|
.Stored fields returned as arrays
|
||||||
|
======================================
|
||||||
|
|
||||||
|
For consistency, stored fields are always returned as an _array_ because there
|
||||||
|
is no way of knowing if the original field value was a single value, multiple
|
||||||
|
values, or an empty array.
|
||||||
|
|
||||||
|
If you need the original value, you should retrieve it from the `_source`
|
||||||
|
field instead.
|
||||||
|
|
||||||
|
======================================
|
||||||
|
|
||||||
|
Another situation where it can make sense to make a field stored is for those
|
||||||
|
that don't appear in the `_source` field (such as <<copy-to,`copy_to` fields>>).
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue