Merge branch 'master' into feature/query-refactoring

Conflicts:
	core/src/main/java/org/elasticsearch/index/query/GeoShapeQueryBuilder.java
	core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java
This commit is contained in:
javanna 2015-06-23 10:16:21 +02:00 committed by Luca Cavanna
commit 99147228d7
440 changed files with 1929 additions and 2018 deletions

View File

@ -281,7 +281,7 @@
</includes>
</testResource>
<testResource>
<directory>${project.basedir}/../rest-api-spec</directory>
<directory>${elasticsearch.tools.directory}/rest-api-spec</directory>
<targetPath>rest-api-spec</targetPath>
<includes>
<include>api/*.json</include>

View File

@ -27,8 +27,10 @@ import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.routing.IndexRoutingTable;
import org.elasticsearch.cluster.routing.RoutingTableValidation;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
@ -60,6 +62,8 @@ public class ClusterHealthResponse extends ActionResponse implements Iterable<Cl
int numberOfPendingTasks = 0;
int numberOfInFlightFetch = 0;
int delayedUnassignedShards = 0;
TimeValue taskMaxWaitingTime = TimeValue.timeValueMillis(0);
double activeShardsPercent = 100;
boolean timedOut = false;
ClusterHealthStatus status = ClusterHealthStatus.RED;
private List<String> validationFailures;
@ -70,15 +74,19 @@ public class ClusterHealthResponse extends ActionResponse implements Iterable<Cl
/** needed for plugins BWC */
public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState) {
this(clusterName, concreteIndices, clusterState, -1, -1, -1);
this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0));
}
public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState, int numberOfPendingTasks,
int numberOfInFlightFetch, int delayedUnassignedShards) {
int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime) {
this.clusterName = clusterName;
this.numberOfPendingTasks = numberOfPendingTasks;
this.numberOfInFlightFetch = numberOfInFlightFetch;
this.delayedUnassignedShards = delayedUnassignedShards;
this.clusterName = clusterName;
this.numberOfPendingTasks = numberOfPendingTasks;
this.numberOfInFlightFetch = numberOfInFlightFetch;
this.taskMaxWaitingTime = taskMaxWaitingTime;
RoutingTableValidation validation = clusterState.routingTable().validate(clusterState.metaData());
validationFailures = validation.failures();
numberOfNodes = clusterState.nodes().size();
@ -116,6 +124,20 @@ public class ClusterHealthResponse extends ActionResponse implements Iterable<Cl
} else if (clusterState.blocks().hasGlobalBlock(RestStatus.SERVICE_UNAVAILABLE)) {
status = ClusterHealthStatus.RED;
}
// shortcut on green
if (status.equals(ClusterHealthStatus.GREEN)) {
this.activeShardsPercent = 100;
} else {
List<ShardRouting> shardRoutings = clusterState.getRoutingTable().allShards();
int activeShardCount = 0;
int totalShardCount = 0;
for (ShardRouting shardRouting : shardRoutings) {
if (shardRouting.active()) activeShardCount++;
totalShardCount++;
}
this.activeShardsPercent = (((double) activeShardCount) / totalShardCount) * 100;
}
}
public String getClusterName() {
@ -200,6 +222,21 @@ public class ClusterHealthResponse extends ActionResponse implements Iterable<Cl
return indices;
}
/**
*
* @return The maximum wait time of all tasks in the queue
*/
public TimeValue getTaskMaxWaitingTime() {
return taskMaxWaitingTime;
}
/**
* The percentage of active shards, should be 100% in a green system
*/
public double getActiveShardsPercent() {
return activeShardsPercent;
}
@Override
public Iterator<ClusterIndexHealth> iterator() {
return indices.values().iterator();
@ -244,6 +281,9 @@ public class ClusterHealthResponse extends ActionResponse implements Iterable<Cl
if (in.getVersion().onOrAfter(Version.V_1_7_0)) {
delayedUnassignedShards= in.readInt();
}
activeShardsPercent = in.readDouble();
taskMaxWaitingTime = TimeValue.readTimeValue(in);
}
@Override
@ -274,6 +314,8 @@ public class ClusterHealthResponse extends ActionResponse implements Iterable<Cl
if (out.getVersion().onOrAfter(Version.V_1_7_0)) {
out.writeInt(delayedUnassignedShards);
}
out.writeDouble(activeShardsPercent);
taskMaxWaitingTime.writeTo(out);
}
@ -299,6 +341,10 @@ public class ClusterHealthResponse extends ActionResponse implements Iterable<Cl
static final XContentBuilderString NUMBER_OF_PENDING_TASKS = new XContentBuilderString("number_of_pending_tasks");
static final XContentBuilderString NUMBER_OF_IN_FLIGHT_FETCH = new XContentBuilderString("number_of_in_flight_fetch");
static final XContentBuilderString DELAYED_UNASSIGNED_SHARDS = new XContentBuilderString("delayed_unassigned_shards");
static final XContentBuilderString TASK_MAX_WAIT_TIME_IN_QUEUE = new XContentBuilderString("task_max_waiting_in_queue");
static final XContentBuilderString TASK_MAX_WAIT_TIME_IN_QUEUE_IN_MILLIS = new XContentBuilderString("task_max_waiting_in_queue_millis");
static final XContentBuilderString ACTIVE_SHARDS_PERCENT_AS_NUMBER = new XContentBuilderString("active_shards_percent_as_number");
static final XContentBuilderString ACTIVE_SHARDS_PERCENT = new XContentBuilderString("active_shards_percent");
static final XContentBuilderString ACTIVE_PRIMARY_SHARDS = new XContentBuilderString("active_primary_shards");
static final XContentBuilderString ACTIVE_SHARDS = new XContentBuilderString("active_shards");
static final XContentBuilderString RELOCATING_SHARDS = new XContentBuilderString("relocating_shards");
@ -323,6 +369,8 @@ public class ClusterHealthResponse extends ActionResponse implements Iterable<Cl
builder.field(Fields.DELAYED_UNASSIGNED_SHARDS, getDelayedUnassignedShards());
builder.field(Fields.NUMBER_OF_PENDING_TASKS, getNumberOfPendingTasks());
builder.field(Fields.NUMBER_OF_IN_FLIGHT_FETCH, getNumberOfInFlightFetch());
builder.timeValueField(Fields.TASK_MAX_WAIT_TIME_IN_QUEUE_IN_MILLIS, Fields.TASK_MAX_WAIT_TIME_IN_QUEUE, getTaskMaxWaitingTime());
builder.percentageField(Fields.ACTIVE_SHARDS_PERCENT_AS_NUMBER, Fields.ACTIVE_SHARDS_PERCENT, getActiveShardsPercent());
String level = params.param("level", "cluster");
boolean outputIndices = "indices".equals(level) || "shards".equals(level);

View File

@ -25,9 +25,6 @@ import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.action.support.master.TransportMasterNodeReadAction;
import org.elasticsearch.cluster.*;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
@ -170,12 +167,14 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
}
private boolean validateRequest(final ClusterHealthRequest request, ClusterState clusterState, final int waitFor) {
ClusterHealthResponse response = clusterHealth(request, clusterState, clusterService.numberOfPendingTasks(), gatewayAllocator.getNumberOfInFlightFetch());
ClusterHealthResponse response = clusterHealth(request, clusterState, clusterService.numberOfPendingTasks(),
gatewayAllocator.getNumberOfInFlightFetch(), clusterService.getMaxTaskWaitTime());
return prepareResponse(request, response, clusterState, waitFor);
}
private ClusterHealthResponse getResponse(final ClusterHealthRequest request, ClusterState clusterState, final int waitFor, boolean timedOut) {
ClusterHealthResponse response = clusterHealth(request, clusterState, clusterService.numberOfPendingTasks(), gatewayAllocator.getNumberOfInFlightFetch());
ClusterHealthResponse response = clusterHealth(request, clusterState, clusterService.numberOfPendingTasks(),
gatewayAllocator.getNumberOfInFlightFetch(), clusterService.getMaxTaskWaitTime());
boolean valid = prepareResponse(request, response, clusterState, waitFor);
assert valid || timedOut;
// we check for a timeout here since this method might be called from the wait_for_events
@ -259,20 +258,25 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
}
private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState, int numberOfPendingTasks, int numberOfInFlightFetch) {
private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState, int numberOfPendingTasks, int numberOfInFlightFetch,
TimeValue pendingTaskTimeInQueue) {
if (logger.isTraceEnabled()) {
logger.trace("Calculating health based on state version [{}]", clusterState.version());
}
String[] concreteIndices;
try {
concreteIndices = clusterState.metaData().concreteIndices(request.indicesOptions(), request.indices());
} catch (IndexMissingException e) {
// one of the specified indices is not there - treat it as RED.
ClusterHealthResponse response = new ClusterHealthResponse(clusterName.value(), Strings.EMPTY_ARRAY, clusterState, numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(settings, clusterState));
ClusterHealthResponse response = new ClusterHealthResponse(clusterName.value(), Strings.EMPTY_ARRAY, clusterState,
numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(settings, clusterState),
pendingTaskTimeInQueue);
response.status = ClusterHealthStatus.RED;
return response;
}
return new ClusterHealthResponse(clusterName.value(), concreteIndices, clusterState, numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(settings, clusterState));
return new ClusterHealthResponse(clusterName.value(), concreteIndices, clusterState, numberOfPendingTasks,
numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(settings, clusterState), pendingTaskTimeInQueue);
}
}

View File

@ -303,10 +303,12 @@ public class ClusterStatsNodes implements ToXContent, Streamable {
int availableProcessors;
long availableMemory;
ObjectIntHashMap<OsInfo.Cpu> cpus;
final ObjectIntHashMap<String> names;
final ObjectIntHashMap<OsInfo.Cpu> cpus;
public OsStats() {
cpus = new ObjectIntHashMap<>();
names = new ObjectIntHashMap<>();
}
public void addNodeInfo(NodeInfo nodeInfo) {
@ -314,6 +316,9 @@ public class ClusterStatsNodes implements ToXContent, Streamable {
if (nodeInfo.getOs() == null) {
return;
}
if (nodeInfo.getOs().getName() != null) {
names.addTo(nodeInfo.getOs().getName(), 1);
}
if (nodeInfo.getOs().cpu() != null) {
cpus.addTo(nodeInfo.getOs().cpu(), 1);
}
@ -339,8 +344,13 @@ public class ClusterStatsNodes implements ToXContent, Streamable {
availableProcessors = in.readVInt();
availableMemory = in.readLong();
int size = in.readVInt();
cpus = new ObjectIntHashMap<>(size);
for (; size > 0; size--) {
names.clear();
for (int i = 0; i < size; i++) {
names.addTo(in.readString(), in.readVInt());
}
size = in.readVInt();
cpus.clear();
for (int i = 0; i < size; i++) {
cpus.addTo(OsInfo.Cpu.readCpu(in), in.readVInt());
}
}
@ -349,12 +359,16 @@ public class ClusterStatsNodes implements ToXContent, Streamable {
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(availableProcessors);
out.writeLong(availableMemory);
out.writeVInt(names.size());
for (ObjectIntCursor<String> name : names) {
out.writeString(name.key);
out.writeVInt(name.value);
}
out.writeVInt(cpus.size());
for (ObjectIntCursor<OsInfo.Cpu> c : cpus) {
c.key.writeTo(out);
out.writeVInt(c.value);
}
}
public static OsStats readOsStats(StreamInput in) throws IOException {
@ -365,6 +379,8 @@ public class ClusterStatsNodes implements ToXContent, Streamable {
static final class Fields {
static final XContentBuilderString AVAILABLE_PROCESSORS = new XContentBuilderString("available_processors");
static final XContentBuilderString NAME = new XContentBuilderString("name");
static final XContentBuilderString NAMES = new XContentBuilderString("names");
static final XContentBuilderString MEM = new XContentBuilderString("mem");
static final XContentBuilderString TOTAL = new XContentBuilderString("total");
static final XContentBuilderString TOTAL_IN_BYTES = new XContentBuilderString("total_in_bytes");
@ -379,6 +395,15 @@ public class ClusterStatsNodes implements ToXContent, Streamable {
builder.byteSizeField(Fields.TOTAL_IN_BYTES, Fields.TOTAL, availableMemory);
builder.endObject();
builder.startArray(Fields.NAMES);
for (ObjectIntCursor<String> name : names) {
builder.startObject();
builder.field(Fields.NAME, name.key);
builder.field(Fields.COUNT, name.value);
builder.endObject();
}
builder.endArray();
builder.startArray(Fields.CPU);
for (ObjectIntCursor<OsInfo.Cpu> cpu : cpus) {
builder.startObject();

View File

@ -133,7 +133,6 @@ public class TransportFieldStatsTransportAction extends TransportBroadcastAction
IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex());
MapperService mapperService = indexServices.mapperService();
IndexShard shard = indexServices.shardSafe(shardId.id());
shard.readAllowed();
try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) {
for (String field : request.getFields()) {
MappedFieldType fieldType = mapperService.fullName(field);

View File

@ -120,4 +120,10 @@ public interface ClusterService extends LifecycleComponent<ClusterService> {
*/
int numberOfPendingTasks();
/**
* Returns the maximum wait time for tasks in the queue
*
* @returns A zero time value if the queue is empty, otherwise the time value oldest task waiting in the queue
*/
TimeValue getMaxTaskWaitTime();
}

View File

@ -162,43 +162,6 @@ public class AllocationService extends AbstractComponent {
return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData()));
}
/**
* Only handles reroute but *without* any reassignment of unassigned shards or rebalancing. Does
* make sure to handle removed nodes, but only moved the shards to UNASSIGNED, does not reassign
* them.
*/
public RoutingAllocation.Result rerouteWithNoReassign(ClusterState clusterState) {
return rerouteWithNoReassign(clusterState, false);
}
/**
* Only handles reroute but *without* any reassignment of unassigned shards or rebalancing. Does
* make sure to handle removed nodes, but only moved the shards to UNASSIGNED, does not reassign
* them.
*/
public RoutingAllocation.Result rerouteWithNoReassign(ClusterState clusterState, boolean debug) {
RoutingNodes routingNodes = clusterState.routingNodes();
// shuffle the unassigned nodes, just so we won't have things like poison failed shards
routingNodes.unassigned().shuffle();
RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState.nodes(), clusterInfoService.getClusterInfo());
allocation.debugDecision(debug);
boolean changed = false;
// first, clear from the shards any node id they used to belong to that is now dead
changed |= deassociateDeadNodes(allocation);
// create a sorted list of from nodes with least number of shards to the maximum ones
applyNewNodes(allocation);
// elect primaries *before* allocating unassigned, so backups of primaries that failed
// will be moved to primary state and not wait for primaries to be allocated and recovered (*from gateway*)
changed |= electPrimariesAndUnassignedDanglingReplicas(allocation);
if (!changed) {
return new RoutingAllocation.Result(false, clusterState.routingTable());
}
return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData()));
}
private boolean reroute(RoutingAllocation allocation) {
boolean changed = false;
// first, clear from the shards any node id they used to belong to that is now dead

View File

@ -243,7 +243,7 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
}
// call the post added notification on the same event thread
try {
updateTasksExecutor.execute(new TimedPrioritizedRunnable(Priority.HIGH, "_add_listener_") {
updateTasksExecutor.execute(new SourcePrioritizedRunnable(Priority.HIGH, "_add_listener_") {
@Override
public void run() {
if (timeout != null) {
@ -312,12 +312,12 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
final Object task = pending.task;
if (task == null) {
continue;
} else if (task instanceof TimedPrioritizedRunnable) {
TimedPrioritizedRunnable runnable = (TimedPrioritizedRunnable) task;
} else if (task instanceof SourcePrioritizedRunnable) {
SourcePrioritizedRunnable runnable = (SourcePrioritizedRunnable) task;
source = runnable.source();
timeInQueue = runnable.timeSinceCreatedInMillis();
timeInQueue = runnable.getAgeInMillis();
} else {
assert false : "expected TimedPrioritizedRunnable got " + task.getClass();
assert false : "expected SourcePrioritizedRunnable got " + task.getClass();
source = "unknown [" + task.getClass() + "]";
timeInQueue = 0;
}
@ -332,21 +332,17 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
return updateTasksExecutor.getNumberOfPendingTasks();
}
@Override
public TimeValue getMaxTaskWaitTime() {
return updateTasksExecutor.getMaxTaskWaitTime();
}
static abstract class TimedPrioritizedRunnable extends PrioritizedRunnable {
private final long creationTimeNS;
static abstract class SourcePrioritizedRunnable extends PrioritizedRunnable {
protected final String source;
protected TimedPrioritizedRunnable(Priority priority, String source) {
public SourcePrioritizedRunnable(Priority priority, String source) {
super(priority);
this.source = source;
this.creationTimeNS = System.nanoTime();
}
public long timeSinceCreatedInMillis() {
// max with 0 to make sure we always return a non negative number
// even if time shifts.
return Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - creationTimeNS));
}
public String source() {
@ -354,11 +350,10 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
}
}
class UpdateTask extends TimedPrioritizedRunnable {
class UpdateTask extends SourcePrioritizedRunnable {
public final ClusterStateUpdateTask updateTask;
UpdateTask(String source, Priority priority, ClusterStateUpdateTask updateTask) {
super(priority, source);
this.updateTask = updateTask;

View File

@ -26,6 +26,8 @@ import org.joda.time.field.OffsetDateTimeField;
import org.joda.time.field.ScaledDurationField;
import org.joda.time.format.*;
import java.io.IOException;
import java.io.Writer;
import java.util.Locale;
import java.util.regex.Pattern;
@ -135,9 +137,9 @@ public class Joda {
} else if ("yearMonthDay".equals(input) || "year_month_day".equals(input)) {
formatter = ISODateTimeFormat.yearMonthDay();
} else if ("epoch_second".equals(input)) {
formatter = new DateTimeFormatterBuilder().append(new EpochTimeParser(false)).toFormatter();
formatter = new DateTimeFormatterBuilder().append(new EpochTimePrinter(false), new EpochTimeParser(false)).toFormatter();
} else if ("epoch_millis".equals(input)) {
formatter = new DateTimeFormatterBuilder().append(new EpochTimeParser(true)).toFormatter();
formatter = new DateTimeFormatterBuilder().append(new EpochTimePrinter(true), new EpochTimeParser(true)).toFormatter();
} else if (Strings.hasLength(input) && input.contains("||")) {
String[] formats = Strings.delimitedListToStringArray(input, "||");
DateTimeParser[] parsers = new DateTimeParser[formats.length];
@ -200,8 +202,8 @@ public class Joda {
public static class EpochTimeParser implements DateTimeParser {
private static final Pattern MILLI_SECOND_PRECISION_PATTERN = Pattern.compile("^\\d{1,13}$");
private static final Pattern SECOND_PRECISION_PATTERN = Pattern.compile("^\\d{1,10}$");
private static final Pattern MILLI_SECOND_PRECISION_PATTERN = Pattern.compile("^-?\\d{1,13}$");
private static final Pattern SECOND_PRECISION_PATTERN = Pattern.compile("^-?\\d{1,10}$");
private final boolean hasMilliSecondPrecision;
private final Pattern pattern;
@ -218,7 +220,10 @@ public class Joda {
@Override
public int parseInto(DateTimeParserBucket bucket, String text, int position) {
if (text.length() > estimateParsedLength() ||
boolean isPositive = text.startsWith("-") == false;
boolean isTooLong = text.length() > estimateParsedLength();
if ((isPositive && isTooLong) ||
// timestamps have to have UTC timezone
bucket.getZone() != DateTimeZone.UTC ||
pattern.matcher(text).matches() == false) {
@ -242,5 +247,66 @@ public class Joda {
}
return text.length();
}
};
}
public static class EpochTimePrinter implements DateTimePrinter {
private boolean hasMilliSecondPrecision;
public EpochTimePrinter(boolean hasMilliSecondPrecision) {
this.hasMilliSecondPrecision = hasMilliSecondPrecision;
}
@Override
public int estimatePrintedLength() {
return hasMilliSecondPrecision ? 13 : 10;
}
@Override
public void printTo(StringBuffer buf, long instant, Chronology chrono, int displayOffset, DateTimeZone displayZone, Locale locale) {
if (hasMilliSecondPrecision) {
buf.append(instant);
} else {
buf.append(instant / 1000);
}
}
@Override
public void printTo(Writer out, long instant, Chronology chrono, int displayOffset, DateTimeZone displayZone, Locale locale) throws IOException {
if (hasMilliSecondPrecision) {
out.write(String.valueOf(instant));
} else {
out.append(String.valueOf(instant / 1000));
}
}
@Override
public void printTo(StringBuffer buf, ReadablePartial partial, Locale locale) {
if (hasMilliSecondPrecision) {
buf.append(String.valueOf(getDateTimeMillis(partial)));
} else {
buf.append(String.valueOf(getDateTimeMillis(partial) / 1000));
}
}
@Override
public void printTo(Writer out, ReadablePartial partial, Locale locale) throws IOException {
if (hasMilliSecondPrecision) {
out.append(String.valueOf(getDateTimeMillis(partial)));
} else {
out.append(String.valueOf(getDateTimeMillis(partial) / 1000));
}
}
private long getDateTimeMillis(ReadablePartial partial) {
int year = partial.get(DateTimeFieldType.year());
int monthOfYear = partial.get(DateTimeFieldType.monthOfYear());
int dayOfMonth = partial.get(DateTimeFieldType.dayOfMonth());
int hourOfDay = partial.get(DateTimeFieldType.hourOfDay());
int minuteOfHour = partial.get(DateTimeFieldType.minuteOfHour());
int secondOfMinute = partial.get(DateTimeFieldType.secondOfMinute());
int millisOfSecond = partial.get(DateTimeFieldType.millisOfSecond());
return partial.getChronology().getDateTimeMillis(year, monthOfYear, dayOfMonth, hourOfDay, minuteOfHour, secondOfMinute, millisOfSecond);
}
}
}

View File

@ -20,7 +20,10 @@
package org.elasticsearch.common.util.concurrent;
import java.util.concurrent.*;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
/**
* An extension to thread pool executor, allowing (in the future) to add specific additional stats to it.
@ -67,8 +70,8 @@ public class EsThreadPoolExecutor extends ThreadPoolExecutor {
}
}
public static interface ShutdownListener {
public void onTerminated();
public interface ShutdownListener {
void onTerminated();
}
@Override

View File

@ -36,6 +36,7 @@ import java.util.concurrent.atomic.AtomicLong;
*/
public class PrioritizedEsThreadPoolExecutor extends EsThreadPoolExecutor {
private static final TimeValue NO_WAIT_TIME_VALUE = TimeValue.timeValueMillis(0);
private AtomicLong insertionOrder = new AtomicLong();
private Queue<Runnable> current = ConcurrentCollections.newQueue();
@ -56,6 +57,26 @@ public class PrioritizedEsThreadPoolExecutor extends EsThreadPoolExecutor {
return size;
}
/**
* Returns the waiting time of the first task in the queue
*/
public TimeValue getMaxTaskWaitTime() {
if (getQueue().size() == 0) {
return NO_WAIT_TIME_VALUE;
}
long now = System.nanoTime();
long oldestCreationDateInNanos = now;
for (Runnable queuedRunnable : getQueue()) {
if (queuedRunnable instanceof PrioritizedRunnable) {
oldestCreationDateInNanos = Math.min(oldestCreationDateInNanos,
((PrioritizedRunnable) queuedRunnable).getCreationDateInNanos());
}
}
return TimeValue.timeValueNanos(now - oldestCreationDateInNanos);
}
private void addPending(List<Runnable> runnables, List<Pending> pending, boolean executing) {
for (Runnable runnable : runnables) {
if (runnable instanceof TieBreakingPrioritizedRunnable) {
@ -191,7 +212,6 @@ public class PrioritizedEsThreadPoolExecutor extends EsThreadPoolExecutor {
timeoutFuture = null;
}
}
}
private final class PrioritizedFutureTask<T> extends FutureTask<T> implements Comparable<PrioritizedFutureTask> {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common.util.concurrent;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.unit.TimeValue;
/**
*
@ -26,6 +27,7 @@ import org.elasticsearch.common.Priority;
public abstract class PrioritizedRunnable implements Runnable, Comparable<PrioritizedRunnable> {
private final Priority priority;
private final long creationDate;
public static PrioritizedRunnable wrap(Runnable runnable, Priority priority) {
return new Wrapped(runnable, priority);
@ -33,6 +35,15 @@ public abstract class PrioritizedRunnable implements Runnable, Comparable<Priori
protected PrioritizedRunnable(Priority priority) {
this.priority = priority;
creationDate = System.nanoTime();
}
public long getCreationDateInNanos() {
return creationDate;
}
public long getAgeInMillis() {
return Math.max(0, (System.nanoTime() - creationDate) / 1000);
}
@Override

View File

@ -43,6 +43,7 @@ import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.Calendar;
import java.util.Date;
import java.util.Locale;
import java.util.Map;
/**
@ -957,6 +958,14 @@ public final class XContentBuilder implements BytesStream, Releasable {
return this;
}
public XContentBuilder percentageField(XContentBuilderString rawFieldName, XContentBuilderString readableFieldName, double percentage) throws IOException {
if (humanReadable) {
field(readableFieldName, String.format(Locale.ROOT, "%1.1f%%", percentage));
}
field(rawFieldName, percentage);
return this;
}
public XContentBuilder value(Boolean value) throws IOException {
if (value == null) {
return nullValue();

View File

@ -161,7 +161,9 @@ public class TimestampFieldMapper extends DateFieldMapper implements RootMapper
@Override
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
TimestampFieldMapper.Builder builder = timestamp();
parseField(builder, builder.name, node, parserContext);
if (parserContext.indexVersionCreated().before(Version.V_2_0_0)) {
parseField(builder, builder.name, node, parserContext);
}
boolean defaultSet = false;
Boolean ignoreMissing = null;
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
@ -172,7 +174,7 @@ public class TimestampFieldMapper extends DateFieldMapper implements RootMapper
EnabledAttributeMapper enabledState = nodeBooleanValue(fieldNode) ? EnabledAttributeMapper.ENABLED : EnabledAttributeMapper.DISABLED;
builder.enabled(enabledState);
iterator.remove();
} else if (fieldName.equals("path")) {
} else if (fieldName.equals("path") && parserContext.indexVersionCreated().before(Version.V_2_0_0)) {
builder.path(fieldNode.toString());
iterator.remove();
} else if (fieldName.equals("format")) {
@ -265,11 +267,6 @@ public class TimestampFieldMapper extends DateFieldMapper implements RootMapper
return defaultFieldType;
}
@Override
public boolean defaultDocValues() {
return false;
}
public boolean enabled() {
return this.enabledState.enabled;
}
@ -340,14 +337,16 @@ public class TimestampFieldMapper extends DateFieldMapper implements RootMapper
if (includeDefaults || enabledState != Defaults.ENABLED) {
builder.field("enabled", enabledState.enabled);
}
if (includeDefaults || (indexed != indexedDefault) || (fieldType().tokenized() != Defaults.FIELD_TYPE.tokenized())) {
if (indexCreatedBefore2x && (includeDefaults || (indexed != indexedDefault) || (fieldType().tokenized() != Defaults.FIELD_TYPE.tokenized()))) {
builder.field("index", indexTokenizeOptionToString(indexed, fieldType().tokenized()));
}
if (includeDefaults || fieldType().stored() != Defaults.FIELD_TYPE.stored()) {
if (indexCreatedBefore2x && (includeDefaults || fieldType().stored() != Defaults.PRE_20_FIELD_TYPE.stored())) {
builder.field("store", fieldType().stored());
}
doXContentDocValues(builder, includeDefaults);
if (includeDefaults || path != Defaults.PATH) {
if (indexCreatedBefore2x) {
doXContentDocValues(builder, includeDefaults);
}
if (indexCreatedBefore2x && (includeDefaults || path != Defaults.PATH)) {
builder.field("path", path);
}
if (includeDefaults || !fieldType().dateTimeFormatter().format().equals(Defaults.DATE_TIME_FORMATTER.format())) {
@ -359,10 +358,12 @@ public class TimestampFieldMapper extends DateFieldMapper implements RootMapper
if (includeDefaults || ignoreMissing != null) {
builder.field("ignore_missing", ignoreMissing);
}
if (customFieldDataSettings != null) {
builder.field("fielddata", (Map) customFieldDataSettings.getAsMap());
} else if (includeDefaults) {
builder.field("fielddata", (Map) fieldType().fieldDataType().getSettings().getAsMap());
if (indexCreatedBefore2x) {
if (customFieldDataSettings != null) {
builder.field("fielddata", (Map) customFieldDataSettings.getAsMap());
} else if (includeDefaults) {
builder.field("fielddata", (Map) fieldType().fieldDataType().getSettings().getAsMap());
}
}
builder.endObject();

View File

@ -74,7 +74,6 @@ public class PercolatorQueriesRegistry extends AbstractIndexShardComponent imple
private final IndexQueryParserService queryParserService;
private final MapperService mapperService;
private final IndicesLifecycle indicesLifecycle;
private final IndexCache indexCache;
private final IndexFieldDataService indexFieldDataService;
private final ShardIndexingService indexingService;
@ -98,13 +97,12 @@ public class PercolatorQueriesRegistry extends AbstractIndexShardComponent imple
@Inject
public PercolatorQueriesRegistry(ShardId shardId, @IndexSettings Settings indexSettings, IndexQueryParserService queryParserService,
ShardIndexingService indexingService, IndicesLifecycle indicesLifecycle, MapperService mapperService,
IndexCache indexCache, IndexFieldDataService indexFieldDataService, ShardPercolateService shardPercolateService) {
IndexFieldDataService indexFieldDataService, ShardPercolateService shardPercolateService) {
super(shardId, indexSettings);
this.queryParserService = queryParserService;
this.mapperService = mapperService;
this.indicesLifecycle = indicesLifecycle;
this.indexingService = indexingService;
this.indexCache = indexCache;
this.indexFieldDataService = indexFieldDataService;
this.shardPercolateService = shardPercolateService;
this.mapUnmappedFieldsAsString = indexSettings.getAsBoolean(MAP_UNMAPPED_FIELDS_AS_STRING, false);
@ -259,7 +257,7 @@ public class PercolatorQueriesRegistry extends AbstractIndexShardComponent imple
}
@Override
public void afterIndexShardPostRecovery(IndexShard indexShard) {
public void beforeIndexShardPostRecovery(IndexShard indexShard) {
if (hasPercolatorType(indexShard)) {
// percolator index has started, fetch what we can from it and initialize the indices
// we have
@ -276,8 +274,9 @@ public class PercolatorQueriesRegistry extends AbstractIndexShardComponent imple
private int loadQueries(IndexShard shard) {
shard.refresh("percolator_load_queries");
// Maybe add a mode load? This isn't really a write. We need write b/c state=post_recovery
try (Engine.Searcher searcher = shard.acquireSearcher("percolator_load_queries", true)) {
// NOTE: we acquire the searcher via the engine directly here since this is executed right
// before the shard is marked as POST_RECOVERY
try (Engine.Searcher searcher = shard.engine().acquireSearcher("percolator_load_queries")) {
Query query = new TermQuery(new Term(TypeFieldMapper.NAME, PercolatorService.TYPE_NAME));
QueriesLoaderCollector queryCollector = new QueriesLoaderCollector(PercolatorQueriesRegistry.this, logger, mapperService, indexFieldDataService);
searcher.searcher().search(query, queryCollector);

View File

@ -28,6 +28,6 @@ public interface BoostableQueryBuilder<B extends BoostableQueryBuilder<B>> {
* Sets the boost for this query. Documents matching this query will (in addition to the normal
* weightings) have their score multiplied by the boost provided.
*/
public B boost(float boost);
B boost(float boost);
}

View File

@ -27,12 +27,14 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
/**
* {@link AbstractQueryBuilder} that builds a GeoShape Filter
* {@link QueryBuilder} that builds a GeoShape Filter
*/
public class GeoShapeQueryBuilder extends AbstractQueryBuilder<GeoShapeQueryBuilder> {
public class GeoShapeQueryBuilder extends AbstractQueryBuilder<GeoShapeQueryBuilder> implements BoostableQueryBuilder<GeoShapeQueryBuilder> {
public static final String NAME = "geo_shape";
static final GeoShapeQueryBuilder PROTOTYPE = new GeoShapeQueryBuilder(null, null);
private final String name;
private final ShapeBuilder shape;
@ -49,8 +51,8 @@ public class GeoShapeQueryBuilder extends AbstractQueryBuilder<GeoShapeQueryBuil
private ShapeRelation relation = null;
static final GeoShapeQueryBuilder PROTOTYPE = new GeoShapeQueryBuilder(null, null);
private float boost = -1;
/**
* Creates a new GeoShapeQueryBuilder whose Filter will be against the
* given field name using the given Shape
@ -150,6 +152,12 @@ public class GeoShapeQueryBuilder extends AbstractQueryBuilder<GeoShapeQueryBuil
return this;
}
@Override
public GeoShapeQueryBuilder boost(float boost) {
this.boost = boost;
return this;
}
@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
@ -181,6 +189,10 @@ public class GeoShapeQueryBuilder extends AbstractQueryBuilder<GeoShapeQueryBuil
builder.endObject();
if (boost != -1) {
builder.field("boost", boost);
}
if (name != null) {
builder.field("_name", queryName);
}

View File

@ -22,7 +22,7 @@ package org.elasticsearch.index.query;
/**
* A filter for a field based on several terms matching on any of them.
* @deprecated use {@link TermsQueryBuilder#TermsQueryBuilder(name, lookupIndex, lookupType, lookupId)} instead.
* @deprecated use {@link TermsQueryBuilder} instead.
*/
@Deprecated
public class TermsLookupQueryBuilder extends TermsQueryBuilder {

View File

@ -26,10 +26,12 @@ import java.io.IOException;
/**
* A filter for a field based on several terms matching on any of them.
*/
public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> implements BoostableQueryBuilder<TermsQueryBuilder> {
public static final String NAME = "terms";
static final TermsQueryBuilder PROTOTYPE = new TermsQueryBuilder(null, (Object) null);
private final String name;
private final Object values;
@ -45,7 +47,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
private String lookupPath;
private Boolean lookupCache;
static final TermsQueryBuilder PROTOTYPE = new TermsQueryBuilder(null, (Object) null);
private float boost = -1;
/**
* A filter for a field based on several terms matching on any of them.
@ -184,6 +186,12 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
return this;
}
@Override
public TermsQueryBuilder boost(float boost) {
this.boost = boost;
return this;
}
@Override
public void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
@ -208,6 +216,11 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
if (execution != null) {
builder.field("execution", execution);
}
if (boost != -1) {
builder.field("boost", boost);
}
if (queryName != null) {
builder.field("_name", queryName);
}

View File

@ -742,11 +742,7 @@ public class IndexShard extends AbstractIndexShardComponent {
}
public Engine.Searcher acquireSearcher(String source) {
return acquireSearcher(source, false);
}
public Engine.Searcher acquireSearcher(String source, boolean searcherForWriteOperation) {
readAllowed(searcherForWriteOperation);
readAllowed();
return engine().acquireSearcher(source);
}
@ -776,6 +772,7 @@ public class IndexShard extends AbstractIndexShardComponent {
}
public IndexShard postRecovery(String reason) throws IndexShardStartedException, IndexShardRelocatedException, IndexShardClosedException {
indicesLifecycle.beforeIndexShardPostRecovery(this);
synchronized (mutex) {
if (state == IndexShardState.CLOSED) {
throw new IndexShardClosedException(shardId);
@ -907,20 +904,9 @@ public class IndexShard extends AbstractIndexShardComponent {
}
public void readAllowed() throws IllegalIndexShardStateException {
readAllowed(false);
}
private void readAllowed(boolean writeOperation) throws IllegalIndexShardStateException {
IndexShardState state = this.state; // one time volatile read
if (writeOperation) {
if (state != IndexShardState.STARTED && state != IndexShardState.RELOCATED && state != IndexShardState.RECOVERING && state != IndexShardState.POST_RECOVERY) {
throw new IllegalIndexShardStateException(shardId, state, "operations only allowed when started/relocated");
}
} else {
if (state != IndexShardState.STARTED && state != IndexShardState.RELOCATED) {
throw new IllegalIndexShardStateException(shardId, state, "operations only allowed when started/relocated");
}
if (state != IndexShardState.STARTED && state != IndexShardState.RELOCATED) {
throw new IllegalIndexShardStateException(shardId, state, "operations only allowed when started/relocated");
}
}

View File

@ -97,9 +97,16 @@ public interface IndicesLifecycle {
}
public void afterIndexShardPostRecovery(IndexShard indexShard) {
/**
* Called right after the shard is moved into POST_RECOVERY mode
*/
public void afterIndexShardPostRecovery(IndexShard indexShard) {}
}
/**
* Called right before the shard is moved into POST_RECOVERY mode.
* The shard is ready to be used but not yet marked as POST_RECOVERY.
*/
public void beforeIndexShardPostRecovery(IndexShard indexShard) {}
/**
* Called after the index shard has been started.

View File

@ -121,6 +121,18 @@ public class InternalIndicesLifecycle extends AbstractComponent implements Indic
}
}
public void beforeIndexShardPostRecovery(IndexShard indexShard) {
for (Listener listener : listeners) {
try {
listener.beforeIndexShardPostRecovery(indexShard);
} catch (Throwable t) {
logger.warn("{} failed to invoke before shard post recovery callback", t, indexShard.shardId());
throw t;
}
}
}
public void afterIndexShardPostRecovery(IndexShard indexShard) {
for (Listener listener : listeners) {
try {

View File

@ -39,6 +39,8 @@ public class OsInfo implements Streamable, Serializable, ToXContent {
int availableProcessors;
String name = null;
Cpu cpu = null;
Mem mem = null;
@ -88,8 +90,13 @@ public class OsInfo implements Streamable, Serializable, ToXContent {
return swap();
}
public String getName() {
return name;
}
static final class Fields {
static final XContentBuilderString OS = new XContentBuilderString("os");
static final XContentBuilderString NAME = new XContentBuilderString("name");
static final XContentBuilderString REFRESH_INTERVAL = new XContentBuilderString("refresh_interval");
static final XContentBuilderString REFRESH_INTERVAL_IN_MILLIS = new XContentBuilderString("refresh_interval_in_millis");
static final XContentBuilderString AVAILABLE_PROCESSORS = new XContentBuilderString("available_processors");
@ -112,6 +119,9 @@ public class OsInfo implements Streamable, Serializable, ToXContent {
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(Fields.OS);
if (name != null) {
builder.field(Fields.NAME, name);
}
builder.timeValueField(Fields.REFRESH_INTERVAL_IN_MILLIS, Fields.REFRESH_INTERVAL, refreshInterval);
builder.field(Fields.AVAILABLE_PROCESSORS, availableProcessors);
if (cpu != null) {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.monitor.os;
import org.apache.lucene.util.Constants;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
@ -46,6 +47,7 @@ public class OsService extends AbstractComponent {
this.info = probe.osInfo();
this.info.refreshInterval = refreshInterval.millis();
this.info.availableProcessors = Runtime.getRuntime().availableProcessors();
this.info.name = Constants.OS_NAME;
osStatsCache = new OsStatsCache(refreshInterval, probe.osStats());
logger.debug("Using probe [{}] with refresh_interval [{}]", probe, refreshInterval);
}

View File

@ -80,6 +80,8 @@ public class RestHealthAction extends AbstractCatAction {
t.addCell("init", "alias:i,shards.initializing,shardsInitializing;text-align:right;desc:number of initializing nodes");
t.addCell("unassign", "alias:u,shards.unassigned,shardsUnassigned;text-align:right;desc:number of unassigned shards");
t.addCell("pending_tasks", "alias:pt,pendingTasks;text-align:right;desc:number of pending tasks");
t.addCell("max_task_wait_time", "alias:mtwt,maxTaskWaitTime;text-align:right;desc:wait time of longest task pending");
t.addCell("active_shards_percent", "alias:asp,activeShardsPercent;text-align:right;desc:active number of shards in percent");
t.endHeaders();
return t;
@ -103,6 +105,8 @@ public class RestHealthAction extends AbstractCatAction {
t.addCell(health.getInitializingShards());
t.addCell(health.getUnassignedShards());
t.addCell(health.getNumberOfPendingTasks());
t.addCell(health.getTaskMaxWaitingTime().millis() == 0 ? "-" : health.getTaskMaxWaitingTime());
t.addCell(String.format(Locale.ROOT, "%1.1f%%", health.getActiveShardsPercent()));
t.endRow();
return t;
}

View File

@ -120,7 +120,6 @@ public class MovAvgPipelineAggregator extends PipelineAggregator {
InternalHistogram.Bucket newBucket = bucket;
if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
values.offer(thisBucketValue);
// Some models (e.g. HoltWinters) have certain preconditions that must be met
if (model.hasValue(values.size())) {
@ -142,6 +141,8 @@ public class MovAvgPipelineAggregator extends PipelineAggregator {
}
lastValidPosition = counter;
}
values.offer(thisBucketValue);
}
counter += 1;
newBuckets.add(newBucket);

View File

@ -29,6 +29,7 @@ import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
@ -50,6 +51,15 @@ public class EwmaModel extends MovAvgModel {
this.alpha = alpha;
}
@Override
protected <T extends Number> double[] doPredict(Collection<T> values, int numPredictions) {
double[] predictions = new double[numPredictions];
// EWMA just emits the same final prediction repeatedly.
Arrays.fill(predictions, next(values));
return predictions;
}
@Override
public <T extends Number> double next(Collection<T> values) {

View File

@ -67,7 +67,7 @@ public class HoltLinearModel extends MovAvgModel {
* @return Returns an array of doubles, since most smoothing methods operate on floating points
*/
@Override
public <T extends Number> double[] predict(Collection<T> values, int numPredictions) {
protected <T extends Number> double[] doPredict(Collection<T> values, int numPredictions) {
return next(values, numPredictions);
}

View File

@ -176,7 +176,7 @@ public class HoltWintersModel extends MovAvgModel {
* @return Returns an array of doubles, since most smoothing methods operate on floating points
*/
@Override
public <T extends Number> double[] predict(Collection<T> values, int numPredictions) {
protected <T extends Number> double[] doPredict(Collection<T> values, int numPredictions) {
return next(values, numPredictions);
}

View File

@ -30,6 +30,7 @@ import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
@ -41,6 +42,16 @@ public class LinearModel extends MovAvgModel {
protected static final ParseField NAME_FIELD = new ParseField("linear");
@Override
protected <T extends Number> double[] doPredict(Collection<T> values, int numPredictions) {
double[] predictions = new double[numPredictions];
// EWMA just emits the same final prediction repeatedly.
Arrays.fill(predictions, next(values));
return predictions;
}
@Override
public <T extends Number> double next(Collection<T> values) {
double avg = 0;

View File

@ -19,8 +19,6 @@
package org.elasticsearch.search.aggregations.pipeline.movavg.models;
import com.google.common.collect.EvictingQueue;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.search.SearchParseException;
@ -44,7 +42,7 @@ public abstract class MovAvgModel {
*/
public boolean hasValue(int windowLength) {
// Default implementation can always provide a next() value
return true;
return windowLength > 0;
}
/**
@ -57,9 +55,7 @@ public abstract class MovAvgModel {
public abstract <T extends Number> double next(Collection<T> values);
/**
* Predicts the next `n` values in the series, using the smoothing model to generate new values.
* Default prediction mode is to simply continuing calling <code>next()</code> and adding the
* predicted value back into the windowed buffer.
* Predicts the next `n` values in the series.
*
* @param values Collection of numerics to movingAvg, usually windowed
* @param numPredictions Number of newly generated predictions to return
@ -67,34 +63,31 @@ public abstract class MovAvgModel {
* @return Returns an array of doubles, since most smoothing methods operate on floating points
*/
public <T extends Number> double[] predict(Collection<T> values, int numPredictions) {
double[] predictions = new double[numPredictions];
assert(numPredictions >= 1);
// If there are no values, we can't do anything. Return an array of NaNs.
if (values.size() == 0) {
if (values.isEmpty()) {
return emptyPredictions(numPredictions);
}
// special case for one prediction, avoids allocation
if (numPredictions < 1) {
throw new IllegalArgumentException("numPredictions may not be less than 1.");
} else if (numPredictions == 1){
predictions[0] = next(values);
return predictions;
}
Collection<Number> predictionBuffer = EvictingQueue.create(values.size());
predictionBuffer.addAll(values);
for (int i = 0; i < numPredictions; i++) {
predictions[i] = next(predictionBuffer);
// Add the last value to the buffer, so we can keep predicting
predictionBuffer.add(predictions[i]);
}
return predictions;
return doPredict(values, numPredictions);
}
/**
* Calls to the model-specific implementation which actually generates the predictions
*
* @param values Collection of numerics to movingAvg, usually windowed
* @param numPredictions Number of newly generated predictions to return
* @param <T> Type of numeric
* @return Returns an array of doubles, since most smoothing methods operate on floating points
*/
protected abstract <T extends Number> double[] doPredict(Collection<T> values, int numPredictions);
/**
* Returns an empty set of predictions, filled with NaNs
* @param numPredictions
* @return
*/
protected double[] emptyPredictions(int numPredictions) {
double[] predictions = new double[numPredictions];
Arrays.fill(predictions, Double.NaN);

View File

@ -29,6 +29,7 @@ import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
@ -39,6 +40,16 @@ public class SimpleModel extends MovAvgModel {
protected static final ParseField NAME_FIELD = new ParseField("simple");
@Override
protected <T extends Number> double[] doPredict(Collection<T> values, int numPredictions) {
double[] predictions = new double[numPredictions];
// EWMA just emits the same final prediction repeatedly.
Arrays.fill(predictions, next(values));
return predictions;
}
@Override
public <T extends Number> double next(Collection<T> values) {
double avg = 0;

View File

@ -31,6 +31,7 @@ import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.routing.*;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.hamcrest.Matchers;
@ -38,8 +39,9 @@ import org.junit.Test;
import java.io.IOException;
import static org.hamcrest.CoreMatchers.allOf;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.*;
public class ClusterHealthResponsesTests extends ElasticsearchTestCase {
@ -193,13 +195,16 @@ public class ClusterHealthResponsesTests extends ElasticsearchTestCase {
int pendingTasks = randomIntBetween(0, 200);
int inFlight = randomIntBetween(0, 200);
int delayedUnassigned = randomIntBetween(0, 200);
ClusterHealthResponse clusterHealth = new ClusterHealthResponse("bla", clusterState.metaData().concreteIndices(IndicesOptions.strictExpand(), (String[]) null), clusterState, pendingTasks, inFlight, delayedUnassigned);
TimeValue pendingTaskInQueueTime = TimeValue.timeValueMillis(randomIntBetween(1000, 100000));
ClusterHealthResponse clusterHealth = new ClusterHealthResponse("bla", clusterState.metaData().concreteIndices(IndicesOptions.strictExpand(), (String[]) null), clusterState, pendingTasks, inFlight, delayedUnassigned, pendingTaskInQueueTime);
logger.info("cluster status: {}, expected {}", clusterHealth.getStatus(), counter.status());
clusterHealth = maybeSerialize(clusterHealth);
assertClusterHealth(clusterHealth, counter);
assertThat(clusterHealth.getNumberOfPendingTasks(), Matchers.equalTo(pendingTasks));
assertThat(clusterHealth.getNumberOfInFlightFetch(), Matchers.equalTo(inFlight));
assertThat(clusterHealth.getDelayedUnassignedShards(), Matchers.equalTo(delayedUnassigned));
assertThat(clusterHealth.getTaskMaxWaitingTime().millis(), is(pendingTaskInQueueTime.millis()));
assertThat(clusterHealth.getActiveShardsPercent(), is(allOf(greaterThanOrEqualTo(0.0), lessThanOrEqualTo(100.0))));
}
ClusterHealthResponse maybeSerialize(ClusterHealthResponse clusterHealth) throws IOException {
@ -227,7 +232,7 @@ public class ClusterHealthResponsesTests extends ElasticsearchTestCase {
metaData.put(indexMetaData, true);
routingTable.add(indexRoutingTable);
ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).metaData(metaData).routingTable(routingTable).build();
ClusterHealthResponse clusterHealth = new ClusterHealthResponse("bla", clusterState.metaData().concreteIndices(IndicesOptions.strictExpand(), (String[]) null), clusterState, 0, 0, 0);
ClusterHealthResponse clusterHealth = new ClusterHealthResponse("bla", clusterState.metaData().concreteIndices(IndicesOptions.strictExpand(), (String[]) null), clusterState, 0, 0, 0, TimeValue.timeValueMillis(0));
clusterHealth = maybeSerialize(clusterHealth);
// currently we have no cluster level validation failures as index validation issues are reported per index.
assertThat(clusterHealth.getValidationFailures(), Matchers.hasSize(0));

View File

@ -102,65 +102,4 @@ public class FailedNodeRoutingTests extends ElasticsearchAllocationTestCase {
assertThat(routingNode.numberOfShardsWithState(INITIALIZING), equalTo(1));
}
}
@Test
public void simpleFailedNodeTestNoReassign() {
AllocationService strategy = createAllocationService(settingsBuilder().put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
ClusterRebalanceAllocationDecider.ClusterRebalanceType.ALWAYS.toString()).build());
MetaData metaData = MetaData.builder()
.put(IndexMetaData.builder("test1").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1))
.put(IndexMetaData.builder("test2").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1))
.build();
RoutingTable routingTable = RoutingTable.builder()
.addAsNew(metaData.index("test1"))
.addAsNew(metaData.index("test2"))
.build();
ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT).metaData(metaData).routingTable(routingTable).build();
logger.info("start 4 nodes");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")).put(newNode("node3")).put(newNode("node4"))).build();
RoutingTable prevRoutingTable = routingTable;
routingTable = strategy.reroute(clusterState).routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
logger.info("start all the primary shards, replicas will start initializing");
RoutingNodes routingNodes = clusterState.routingNodes();
prevRoutingTable = routingTable;
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.routingNodes();
logger.info("start the replica shards");
routingNodes = clusterState.routingNodes();
prevRoutingTable = routingTable;
routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)).routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.routingNodes();
assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), equalTo(1));
assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), equalTo(1));
assertThat(routingNodes.node("node3").numberOfShardsWithState(STARTED), equalTo(1));
assertThat(routingNodes.node("node4").numberOfShardsWithState(STARTED), equalTo(1));
logger.info("remove 2 nodes where primaries are allocated, reroute");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes())
.remove(routingTable.index("test1").shard(0).primaryShard().currentNodeId())
.remove(routingTable.index("test2").shard(0).primaryShard().currentNodeId())
)
.build();
prevRoutingTable = routingTable;
routingTable = strategy.rerouteWithNoReassign(clusterState).routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.routingNodes();
for (RoutingNode routingNode : routingNodes) {
assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(1));
}
assertThat(routingNodes.unassigned().size(), equalTo(2));
}
}

View File

@ -25,11 +25,13 @@ import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.LocalDateTime;
import org.joda.time.MutableDateTime;
import org.joda.time.format.*;
import org.junit.Test;
import java.util.Date;
import java.util.Locale;
import static org.hamcrest.Matchers.*;
@ -250,7 +252,7 @@ public class SimpleJodaTests extends ElasticsearchTestCase {
}
@Test
public void testThatEpochsInSecondsCanBeParsed() {
public void testThatEpochsCanBeParsed() {
boolean parseMilliSeconds = randomBoolean();
// epoch: 1433144433655 => date: Mon Jun 1 09:40:33.655 CEST 2015
@ -271,6 +273,37 @@ public class SimpleJodaTests extends ElasticsearchTestCase {
}
}
@Test
public void testThatNegativeEpochsCanBeParsed() {
// problem: negative epochs can be arbitrary in size...
boolean parseMilliSeconds = randomBoolean();
FormatDateTimeFormatter formatter = Joda.forPattern(parseMilliSeconds ? "epoch_millis" : "epoch_second");
DateTime dateTime = formatter.parser().parseDateTime("-10000");
assertThat(dateTime.getYear(), is(1969));
assertThat(dateTime.getMonthOfYear(), is(12));
assertThat(dateTime.getDayOfMonth(), is(31));
if (parseMilliSeconds) {
assertThat(dateTime.getHourOfDay(), is(23)); // utc timezone, +2 offset due to CEST
assertThat(dateTime.getMinuteOfHour(), is(59));
assertThat(dateTime.getSecondOfMinute(), is(50));
} else {
assertThat(dateTime.getHourOfDay(), is(21)); // utc timezone, +2 offset due to CEST
assertThat(dateTime.getMinuteOfHour(), is(13));
assertThat(dateTime.getSecondOfMinute(), is(20));
}
// every negative epoch must be parsed, no matter if exact the size or bigger
if (parseMilliSeconds) {
formatter.parser().parseDateTime("-100000000");
formatter.parser().parseDateTime("-999999999999");
formatter.parser().parseDateTime("-1234567890123");
} else {
formatter.parser().parseDateTime("-100000000");
formatter.parser().parseDateTime("-1234567890");
}
}
@Test(expected = IllegalArgumentException.class)
public void testForInvalidDatesInEpochSecond() {
FormatDateTimeFormatter formatter = Joda.forPattern("epoch_second");
@ -283,6 +316,51 @@ public class SimpleJodaTests extends ElasticsearchTestCase {
formatter.parser().parseDateTime(randomFrom("invalid date", "12345678901234"));
}
public void testThatEpochParserIsPrinter() {
FormatDateTimeFormatter formatter = Joda.forPattern("epoch_millis");
assertThat(formatter.parser().isPrinter(), is(true));
assertThat(formatter.printer().isPrinter(), is(true));
FormatDateTimeFormatter epochSecondFormatter = Joda.forPattern("epoch_second");
assertThat(epochSecondFormatter.parser().isPrinter(), is(true));
assertThat(epochSecondFormatter.printer().isPrinter(), is(true));
}
public void testThatEpochTimePrinterWorks() {
StringBuffer buffer = new StringBuffer();
LocalDateTime now = LocalDateTime.now();
Joda.EpochTimePrinter epochTimePrinter = new Joda.EpochTimePrinter(false);
epochTimePrinter.printTo(buffer, now, Locale.ROOT);
assertThat(buffer.length(), is(10));
// only check the last digit, as seconds go from 0-99 in the unix timestamp and dont stop at 60
assertThat(buffer.toString(), endsWith(String.valueOf(now.getSecondOfMinute() % 10)));
buffer = new StringBuffer();
Joda.EpochTimePrinter epochMilliSecondTimePrinter = new Joda.EpochTimePrinter(true);
epochMilliSecondTimePrinter.printTo(buffer, now, Locale.ROOT);
assertThat(buffer.length(), is(13));
assertThat(buffer.toString(), endsWith(String.valueOf(now.getMillisOfSecond())));
}
public void testThatEpochParserIsIdempotent() {
FormatDateTimeFormatter formatter = Joda.forPattern("epoch_millis");
DateTime dateTime = formatter.parser().parseDateTime("1234567890123");
assertThat(dateTime.getMillis(), is(1234567890123l));
dateTime = formatter.printer().parseDateTime("1234567890456");
assertThat(dateTime.getMillis(), is(1234567890456l));
dateTime = formatter.parser().parseDateTime("1234567890789");
assertThat(dateTime.getMillis(), is(1234567890789l));
FormatDateTimeFormatter secondsFormatter = Joda.forPattern("epoch_second");
DateTime secondsDateTime = secondsFormatter.parser().parseDateTime("1234567890");
assertThat(secondsDateTime.getMillis(), is(1234567890000l));
secondsDateTime = secondsFormatter.printer().parseDateTime("1234567890");
assertThat(secondsDateTime.getMillis(), is(1234567890000l));
secondsDateTime = secondsFormatter.parser().parseDateTime("1234567890");
assertThat(secondsDateTime.getMillis(), is(1234567890000l));
}
private long utcTimeInMillis(String time) {
return ISODateTimeFormat.dateOptionalTimeParser().withZone(DateTimeZone.UTC).parseMillis(time);
}

View File

@ -570,7 +570,8 @@ public class BulkTests extends ElasticsearchIntegrationTest {
.endObject()
.endObject()
.endObject();
assertAcked(prepareCreate("test").addMapping("type", builder));
assertAcked(prepareCreate("test").addMapping("type", builder)
.setSettings(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_4_2_ID));
String brokenBuildRequestData = "{\"index\": {\"_id\": \"1\"}}\n" +
"{\"name\": \"Malformed}\n" +

View File

@ -1051,28 +1051,32 @@ public class GetActionTests extends ElasticsearchIntegrationTest {
client().prepareIndex("test", "doc").setId("1").setSource(doc).setRouting("1").get();
}
@Test
public void testUngeneratedFieldsNotPartOfSourceUnstored() throws IOException {
indexSingleDocumentWithUngeneratedFieldsThatAreNeverPartOf_source(false, randomBoolean());
String[] fieldsList = {"_timestamp"};
String[] alwaysStoredFieldsList = {"_routing", "_size"};
// before refresh - document is only in translog
assertGetFieldsAlwaysNull(indexOrAlias(), "doc", "1", fieldsList, "1");
assertGetFieldsAlwaysWorks(indexOrAlias(), "doc", "1", alwaysStoredFieldsList, "1");
refresh();
//after refresh - document is in translog and also indexed
assertGetFieldsAlwaysNull(indexOrAlias(), "doc", "1", fieldsList, "1");
assertGetFieldsAlwaysWorks(indexOrAlias(), "doc", "1", alwaysStoredFieldsList, "1");
flush();
//after flush - document is in not anymore translog - only indexed
assertGetFieldsAlwaysNull(indexOrAlias(), "doc", "1", fieldsList, "1");
assertGetFieldsAlwaysWorks(indexOrAlias(), "doc", "1", alwaysStoredFieldsList, "1");
}
@Test
public void testUngeneratedFieldsNotPartOfSourceStored() throws IOException {
indexSingleDocumentWithUngeneratedFieldsThatAreNeverPartOf_source(true, randomBoolean());
String createIndexSource = "{\n" +
" \"settings\": {\n" +
" \"index.translog.disable_flush\": true,\n" +
" \"refresh_interval\": \"-1\"\n" +
" },\n" +
" \"mappings\": {\n" +
" \"parentdoc\": {},\n" +
" \"doc\": {\n" +
" \"_timestamp\": {\n" +
" \"enabled\": true\n" +
" },\n" +
" \"_size\": {\n" +
" \"enabled\": true\n" +
" }\n" +
" }\n" +
" }\n" +
"}";
assertAcked(prepareCreate("test").addAlias(new Alias("alias")).setSource(createIndexSource));
ensureGreen();
String doc = "{\n" +
" \"text\": \"some text.\"\n" +
"}\n";
client().prepareIndex("test", "doc").setId("1").setSource(doc).setRouting("1").get();
String[] fieldsList = {"_timestamp", "_size", "_routing"};
// before refresh - document is only in translog
assertGetFieldsAlwaysWorks(indexOrAlias(), "doc", "1", fieldsList, "1");
@ -1084,36 +1088,6 @@ public class GetActionTests extends ElasticsearchIntegrationTest {
assertGetFieldsAlwaysWorks(indexOrAlias(), "doc", "1", fieldsList, "1");
}
void indexSingleDocumentWithUngeneratedFieldsThatAreNeverPartOf_source(boolean stored, boolean sourceEnabled) {
String storedString = stored ? "yes" : "no";
String createIndexSource = "{\n" +
" \"settings\": {\n" +
" \"index.translog.disable_flush\": true,\n" +
" \"refresh_interval\": \"-1\"\n" +
" },\n" +
" \"mappings\": {\n" +
" \"parentdoc\": {},\n" +
" \"doc\": {\n" +
" \"_timestamp\": {\n" +
" \"store\": \"" + storedString + "\",\n" +
" \"enabled\": true\n" +
" },\n" +
" \"_size\": {\n" +
" \"enabled\": true\n" +
" }\n" +
" }\n" +
" }\n" +
"}";
assertAcked(prepareCreate("test").addAlias(new Alias("alias")).setSource(createIndexSource));
ensureGreen();
String doc = "{\n" +
" \"text\": \"some text.\"\n" +
"}\n";
client().prepareIndex("test", "doc").setId("1").setSource(doc).setRouting("1").get();
}
@Test
public void testGeneratedStringFieldsUnstored() throws IOException {
indexSingleDocumentWithStringFieldsGeneratedFromText(false, randomBoolean());

View File

@ -36,6 +36,7 @@ import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.MappedFieldType;
@ -69,6 +70,7 @@ import static org.hamcrest.Matchers.notNullValue;
/**
*/
public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
Settings BWC_SETTINGS = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_4_2.id).build();
@Test
public void testSimpleDisabled() throws Exception {
@ -87,7 +89,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
@Test
public void testEnabled() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", "yes").field("store", "yes").endObject()
.startObject("_timestamp").field("enabled", "yes").endObject()
.endObject().endObject().string();
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping);
BytesReference source = XContentFactory.jsonBuilder()
@ -110,19 +112,18 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
XContentFactory.jsonBuilder().startObject().startObject("type").startObject("_timestamp").endObject().endObject().string())) {
DocumentMapper docMapper = createIndex("test", Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build()).mapperService().documentMapperParser().parse(mapping);
assertThat(docMapper.timestampFieldMapper().enabled(), equalTo(TimestampFieldMapper.Defaults.ENABLED.enabled));
assertThat(docMapper.timestampFieldMapper().fieldType().stored(), equalTo(version.onOrAfter(Version.V_2_0_0) ? true : false));
assertThat(docMapper.timestampFieldMapper().fieldType().stored(), equalTo(version.onOrAfter(Version.V_2_0_0)));
assertThat(docMapper.timestampFieldMapper().fieldType().indexOptions(), equalTo(TimestampFieldMapper.Defaults.FIELD_TYPE.indexOptions()));
assertThat(docMapper.timestampFieldMapper().path(), equalTo(TimestampFieldMapper.Defaults.PATH));
assertThat(docMapper.timestampFieldMapper().fieldType().dateTimeFormatter().format(), equalTo(TimestampFieldMapper.DEFAULT_DATE_TIME_FORMAT));
assertThat(docMapper.timestampFieldMapper().fieldType().hasDocValues(), equalTo(false));
assertThat(docMapper.timestampFieldMapper().fieldType().hasDocValues(), equalTo(version.onOrAfter(Version.V_2_0_0)));
assertAcked(client().admin().indices().prepareDelete("test").execute().get());
}
}
}
@Test
public void testSetValues() throws Exception {
public void testBackcompatSetValues() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp")
.field("enabled", "yes").field("store", "no").field("index", "no")
@ -130,7 +131,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
.field("doc_values", true)
.endObject()
.endObject().endObject().string();
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping);
DocumentMapper docMapper = createIndex("test", BWC_SETTINGS).mapperService().documentMapperParser().parse(mapping);
assertThat(docMapper.timestampFieldMapper().enabled(), equalTo(true));
assertThat(docMapper.timestampFieldMapper().fieldType().stored(), equalTo(false));
assertEquals(IndexOptions.NONE, docMapper.timestampFieldMapper().fieldType().indexOptions());
@ -142,7 +143,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
@Test
public void testThatDisablingDuringMergeIsWorking() throws Exception {
String enabledMapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", true).field("store", "yes").endObject()
.startObject("_timestamp").field("enabled", true).endObject()
.endObject().endObject().string();
DocumentMapperParser parser = createIndex("test").mapperService().documentMapperParser();
DocumentMapper enabledMapper = parser.parse(enabledMapping);
@ -162,7 +163,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
String enabledMapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", true).field("store", "yes").field("index", "no").endObject()
.endObject().endObject().string();
DocumentMapper enabledMapper = createIndex("test").mapperService().documentMapperParser().parse(enabledMapping);
DocumentMapper enabledMapper = createIndex("test", BWC_SETTINGS).mapperService().documentMapperParser().parse(enabledMapping);
XContentBuilder builder = JsonXContent.contentBuilder().startObject();
enabledMapper.timestampFieldMapper().toXContent(builder, ToXContent.EMPTY_PARAMS).endObject();
@ -176,7 +177,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
}
@Test // Issue 4718: was throwing a TimestampParsingException: failed to parse timestamp [null]
public void testPathMissingDefaultValue() throws Exception {
public void testBackcompatPathMissingDefaultValue() throws Exception {
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp")
.field("enabled", "yes")
@ -190,7 +191,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
.endObject();
MetaData metaData = MetaData.builder().build();
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping.string());
DocumentMapper docMapper = createIndex("test", BWC_SETTINGS).mapperService().documentMapperParser().parse(mapping.string());
MappingMetaData mappingMetaData = new MappingMetaData(docMapper);
@ -230,7 +231,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
}
@Test // Issue 4718: was throwing a TimestampParsingException: failed to parse timestamp [null]
public void testPathMissingDefaultToEpochValue() throws Exception {
public void testBackcompatPathMissingDefaultToEpochValue() throws Exception {
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp")
.field("enabled", "yes")
@ -245,7 +246,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
.endObject();
MetaData metaData = MetaData.builder().build();
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping.string());
DocumentMapper docMapper = createIndex("test", BWC_SETTINGS).mapperService().documentMapperParser().parse(mapping.string());
MappingMetaData mappingMetaData = new MappingMetaData(docMapper);
@ -281,7 +282,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
}
@Test // Issue 4718: was throwing a TimestampParsingException: failed to parse timestamp [null]
public void testPathMissingNowDefaultValue() throws Exception {
public void testBackcompatPathMissingNowDefaultValue() throws Exception {
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp")
.field("enabled", "yes")
@ -296,7 +297,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
.endObject();
MetaData metaData = MetaData.builder().build();
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping.string());
DocumentMapper docMapper = createIndex("test", BWC_SETTINGS).mapperService().documentMapperParser().parse(mapping.string());
MappingMetaData mappingMetaData = new MappingMetaData(docMapper);
@ -355,7 +356,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
}
@Test // Issue 4718: was throwing a TimestampParsingException: failed to parse timestamp [null]
public void testPathMissingShouldFail() throws Exception {
public void testBackcompatPathMissingShouldFail() throws Exception {
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp")
.field("enabled", "yes")
@ -369,7 +370,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
.endObject();
MetaData metaData = MetaData.builder().build();
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping.string());
DocumentMapper docMapper = createIndex("test", BWC_SETTINGS).mapperService().documentMapperParser().parse(mapping.string());
MappingMetaData mappingMetaData = new MappingMetaData(docMapper);
@ -522,14 +523,10 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
@Test
public void testParsingNotDefaultTwiceDoesNotChangeMapping() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", true)
.field("index", randomBoolean() ? "no" : "analyzed") // default is "not_analyzed" which will be omitted when building the source again
.field("doc_values", true)
.field("path", "foo")
.field("default", "1970-01-01")
.startObject("fielddata").field("format", "doc_values").endObject()
.endObject()
.endObject().endObject().string();
.startObject("_timestamp")
.field("enabled", true)
.field("default", "1970-01-01")
.endObject().endObject().endObject().string();
DocumentMapperParser parser = createIndex("test").mapperService().documentMapperParser();
DocumentMapper docMapper = parser.parse(mapping);
@ -538,7 +535,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
}
@Test
public void testParsingTwiceDoesNotChangeTokenizeValue() throws Exception {
public void testBackcompatParsingTwiceDoesNotChangeTokenizeValue() throws Exception {
String[] index_options = {"no", "analyzed", "not_analyzed"};
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", true)
@ -551,7 +548,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
.startObject("properties")
.endObject()
.endObject().endObject().string();
DocumentMapperParser parser = createIndex("test").mapperService().documentMapperParser();
DocumentMapperParser parser = createIndex("test", BWC_SETTINGS).mapperService().documentMapperParser();
DocumentMapper docMapper = parser.parse(mapping);
boolean tokenized = docMapper.timestampFieldMapper().fieldType().tokenized();
@ -603,7 +600,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
}
@Test
public void testMergingConflictsForIndexValues() throws Exception {
public void testBackcompatMergingConflictsForIndexValues() throws Exception {
List<String> indexValues = new ArrayList<>();
indexValues.add("analyzed");
indexValues.add("no");
@ -614,7 +611,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
.field("index", indexValues.remove(randomInt(2)))
.endObject()
.endObject().endObject().string();
DocumentMapperParser parser = createIndex("test").mapperService().documentMapperParser();
DocumentMapperParser parser = createIndex("test", BWC_SETTINGS).mapperService().documentMapperParser();
DocumentMapper docMapper = parser.parse(mapping);
mapping = XContentFactory.jsonBuilder().startObject()
@ -656,9 +653,9 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
}
@Test
public void testMergePaths() throws Exception {
public void testBackcompatMergePaths() throws Exception {
String[] possiblePathValues = {"some_path", "anotherPath", null};
DocumentMapperParser parser = createIndex("test").mapperService().documentMapperParser();
DocumentMapperParser parser = createIndex("test", BWC_SETTINGS).mapperService().documentMapperParser();
XContentBuilder mapping1 = XContentFactory.jsonBuilder().startObject()
.startObject("type")
.startObject("_timestamp");
@ -691,7 +688,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
}
}
public void testDocValuesSerialization() throws Exception {
public void testBackcompatDocValuesSerialization() throws Exception {
// default
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp")
@ -737,7 +734,7 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
}
void assertDocValuesSerialization(String mapping) throws Exception {
DocumentMapperParser parser = createIndex("test_doc_values").mapperService().documentMapperParser();
DocumentMapperParser parser = createIndex("test_doc_values", BWC_SETTINGS).mapperService().documentMapperParser();
DocumentMapper docMapper = parser.parse(mapping);
boolean docValues = docMapper.timestampFieldMapper().fieldType().hasDocValues();
docMapper = parser.parse(docMapper.mappingSource().string());
@ -745,11 +742,11 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
assertAcked(client().admin().indices().prepareDelete("test_doc_values"));
}
public void testPath() throws Exception {
public void testBackcompatPath() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", true).field("path", "custom_timestamp").endObject()
.endObject().endObject().string();
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping);
DocumentMapper docMapper = createIndex("test", BWC_SETTINGS).mapperService().documentMapperParser().parse(mapping);
XContentBuilder doc = XContentFactory.jsonBuilder().startObject().field("custom_timestamp", 1).endObject();
MappingMetaData mappingMetaData = new MappingMetaData(docMapper);
@ -778,12 +775,12 @@ public class TimestampMappingTests extends ElasticsearchSingleNodeTest {
public void testThatEpochCanBeIgnoredWithCustomFormat() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", true).field("format", "yyyyMMddHH").field("path", "custom_timestamp").endObject()
.startObject("_timestamp").field("enabled", true).field("format", "yyyyMMddHH").endObject()
.endObject().endObject().string();
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping);
XContentBuilder doc = XContentFactory.jsonBuilder().startObject().field("custom_timestamp", 2015060210).endObject();
IndexRequest request = new IndexRequest("test", "type", "1").source(doc);
XContentBuilder doc = XContentFactory.jsonBuilder().startObject().endObject();
IndexRequest request = new IndexRequest("test", "type", "1").source(doc).timestamp("2015060210");
MappingMetaData mappingMetaData = new MappingMetaData(docMapper);
request.process(MetaData.builder().build(), mappingMetaData, true, "test");

View File

@ -19,19 +19,16 @@
package org.elasticsearch.index.mapper.update;
import org.apache.lucene.util.LuceneTestCase;
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MergeMappingException;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Test;
import java.util.HashMap;
import java.util.LinkedHashMap;
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
@ -179,64 +176,4 @@ public class UpdateMappingOnClusterTests extends ElasticsearchIntegrationTest {
assertThat(previousMapping.getMappings().get(INDEX).get(TYPE).source(), equalTo(currentMapping.getMappings().get(INDEX).get(TYPE).source()));
}
}
@Test
public void testUpdateTimestamp() throws Exception {
boolean enabled = randomBoolean();
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", enabled).startObject("fielddata").field("loading", "lazy").field("format", "doc_values").endObject().field("store", "no").endObject()
.endObject().endObject();
client().admin().indices().prepareCreate("test").addMapping("type", mapping).get();
GetMappingsResponse appliedMappings = client().admin().indices().prepareGetMappings("test").get();
LinkedHashMap timestampMapping = (LinkedHashMap) appliedMappings.getMappings().get("test").get("type").getSourceAsMap().get("_timestamp");
assertThat((Boolean) timestampMapping.get("store"), equalTo(false));
assertThat((String)((LinkedHashMap) timestampMapping.get("fielddata")).get("loading"), equalTo("lazy"));
assertThat((String)((LinkedHashMap) timestampMapping.get("fielddata")).get("format"), equalTo("doc_values"));
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", enabled).startObject("fielddata").field("loading", "eager").field("format", "array").endObject().field("store", "no").endObject()
.endObject().endObject();
PutMappingResponse putMappingResponse = client().admin().indices().preparePutMapping("test").setType("type").setSource(mapping).get();
appliedMappings = client().admin().indices().prepareGetMappings("test").get();
timestampMapping = (LinkedHashMap) appliedMappings.getMappings().get("test").get("type").getSourceAsMap().get("_timestamp");
assertThat((Boolean) timestampMapping.get("store"), equalTo(false));
assertThat((String)((LinkedHashMap) timestampMapping.get("fielddata")).get("loading"), equalTo("eager"));
assertThat((String)((LinkedHashMap) timestampMapping.get("fielddata")).get("format"), equalTo("array"));
}
@Test
@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/10297")
public void testTimestampMergingConflicts() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject(TYPE)
.startObject("_timestamp").field("enabled", true)
.startObject("fielddata").field("format", "doc_values").endObject()
.field("store", "yes")
.field("index", "analyzed")
.field("path", "foo")
.field("default", "1970-01-01")
.endObject()
.endObject().endObject().string();
client().admin().indices().prepareCreate(INDEX).addMapping(TYPE, mapping).get();
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", false)
.startObject("fielddata").field("format", "array").endObject()
.field("store", "no")
.field("index", "no")
.field("path", "bar")
.field("default", "1970-01-02")
.endObject()
.endObject().endObject().string();
GetMappingsResponse mappingsBeforeUpdateResponse = client().admin().indices().prepareGetMappings(INDEX).addTypes(TYPE).get();
try {
client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(mapping).get();
fail("This should result in conflicts when merging the mapping");
} catch (MergeMappingException e) {
String[] expectedConflicts = {"mapper [_timestamp] has different index values", "mapper [_timestamp] has different store values", "Cannot update default in _timestamp value. Value is 1970-01-01 now encountering 1970-01-02", "Cannot update path in _timestamp value. Value is foo path in merged mapping is bar"};
for (String conflict : expectedConflicts) {
assertThat(e.getDetailedMessage(), containsString(conflict));
}
}
compareMappingOnNodes(mappingsBeforeUpdateResponse);
}
}

View File

@ -19,7 +19,9 @@
package org.elasticsearch.index.mapper.update;
import org.elasticsearch.Version;
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
@ -132,7 +134,7 @@ public class UpdateMappingTests extends ElasticsearchSingleNodeTest {
@Test
public void testTimestampParsing() throws IOException {
IndexService indexService = createIndex("test", Settings.settingsBuilder().build());
IndexService indexService = createIndex("test", Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_4_2.id).build());
XContentBuilder indexMapping = XContentFactory.jsonBuilder();
boolean enabled = randomBoolean();
indexMapping.startObject()

View File

@ -1316,8 +1316,7 @@ public class DateHistogramTests extends ElasticsearchIntegrationTest {
}
}
@AwaitsFix(bugUrl="https://github.com/elastic/elasticsearch/pull/11482")
public void testTimestampField() {
public void testTimestampField() { // see #11692
SearchResponse response = client().prepareSearch("idx").addAggregation(dateHistogram("histo").field("_timestamp").interval(randomFrom(DateHistogramInterval.DAY, DateHistogramInterval.MONTH))).get();
assertSearchResponse(response);
Histogram histo = response.getAggregations().get("histo");

View File

@ -50,6 +50,7 @@ public class PipelineAggregationHelperTests extends ElasticsearchTestCase {
ArrayList<MockBucket> values = new ArrayList<>(size);
boolean lastWasGap = false;
boolean emptyHisto = true;
for (int i = 0; i < size; i++) {
MockBucket bucket = new MockBucket();
@ -70,15 +71,27 @@ public class PipelineAggregationHelperTests extends ElasticsearchTestCase {
bucket.count = randomIntBetween(1, 50);
bucket.docValues = new double[bucket.count];
for (int j = 0; j < bucket.count; j++) {
bucket.docValues[j] = randomDouble() * randomIntBetween(-20,20);
bucket.docValues[j] = randomDouble() * randomIntBetween(-20, 20);
}
lastWasGap = false;
emptyHisto = false;
}
bucket.key = i * interval;
values.add(bucket);
}
if (emptyHisto) {
int idx = randomIntBetween(0, values.size()-1);
MockBucket bucket = values.get(idx);
bucket.count = randomIntBetween(1, 50);
bucket.docValues = new double[bucket.count];
for (int j = 0; j < bucket.count; j++) {
bucket.docValues[j] = randomDouble() * randomIntBetween(-20, 20);
}
values.set(idx, bucket);
}
return values;
}

View File

@ -148,15 +148,6 @@ public class MovAvgTests extends ElasticsearchIntegrationTest {
}
}
// Used for specially crafted gap tests
builders.add(client().prepareIndex("idx", "gap_type").setSource(jsonBuilder().startObject()
.field(INTERVAL_FIELD, 0)
.field(GAP_FIELD, 1).endObject()));
builders.add(client().prepareIndex("idx", "gap_type").setSource(jsonBuilder().startObject()
.field(INTERVAL_FIELD, 49)
.field(GAP_FIELD, 1).endObject()));
for (int i = -10; i < 10; i++) {
builders.add(client().prepareIndex("neg_idx", "type").setSource(
jsonBuilder().startObject().field(INTERVAL_FIELD, i).field(VALUE_FIELD, 10).endObject()));
@ -204,31 +195,36 @@ public class MovAvgTests extends ElasticsearchIntegrationTest {
metricValue = target.equals(MetricTarget.VALUE) ? PipelineAggregationHelperTests.calculateMetric(docValues, metric) : mockBucket.count;
}
window.offer(metricValue);
switch (type) {
case SIMPLE:
values.add(simple(window));
break;
case LINEAR:
values.add(linear(window));
break;
case EWMA:
values.add(ewma(window));
break;
case HOLT:
values.add(holt(window));
break;
case HOLT_WINTERS:
// HW needs at least 2 periods of data to start
if (window.size() >= period * 2) {
values.add(holtWinters(window));
} else {
values.add(null);
}
if (window.size() > 0) {
switch (type) {
case SIMPLE:
values.add(simple(window));
break;
case LINEAR:
values.add(linear(window));
break;
case EWMA:
values.add(ewma(window));
break;
case HOLT:
values.add(holt(window));
break;
case HOLT_WINTERS:
// HW needs at least 2 periods of data to start
if (window.size() >= period * 2) {
values.add(holtWinters(window));
} else {
values.add(null);
}
break;
break;
}
} else {
values.add(null);
}
window.offer(metricValue);
}
testValues.put(type.toString() + "_" + target.toString(), values);
}
@ -685,7 +681,10 @@ public class MovAvgTests extends ElasticsearchIntegrationTest {
List<? extends Bucket> buckets = histo.getBuckets();
assertThat("Size of buckets array is not correct.", buckets.size(), equalTo(25));
for (int i = 0; i < 20; i++) {
SimpleValue current = buckets.get(0).getAggregations().get("movavg_values");
assertThat(current, nullValue());
for (int i = 1; i < 20; i++) {
Bucket bucket = buckets.get(i);
assertThat(bucket, notNullValue());
assertThat((long) bucket.getKey(), equalTo((long) i - 10));
@ -699,7 +698,6 @@ public class MovAvgTests extends ElasticsearchIntegrationTest {
}
for (int i = 20; i < 25; i++) {
System.out.println(i);
Bucket bucket = buckets.get(i);
assertThat(bucket, notNullValue());
assertThat((long) bucket.getKey(), equalTo((long) i - 10));
@ -877,350 +875,6 @@ public class MovAvgTests extends ElasticsearchIntegrationTest {
}
}
/**
* This test uses the "gap" dataset, which is simply a doc at the beginning and end of
* the INTERVAL_FIELD range. These docs have a value of 1 in GAP_FIELD.
* This test verifies that large gaps don't break things, and that the mov avg roughly works
* in the correct manner (checks direction of change, but not actual values)
*/
@Test
public void testGiantGap() {
SearchResponse response = client()
.prepareSearch("idx").setTypes("gap_type")
.addAggregation(
histogram("histo").field(INTERVAL_FIELD).interval(1).extendedBounds(0L, 49L)
.subAggregation(min("the_metric").field(GAP_FIELD))
.subAggregation(movingAvg("movavg_values")
.window(windowSize)
.modelBuilder(randomModelBuilder())
.gapPolicy(gapPolicy)
.setBucketsPaths("the_metric"))
).execute().actionGet();
assertSearchResponse(response);
InternalHistogram<Bucket> histo = response.getAggregations().get("histo");
assertThat(histo, notNullValue());
assertThat(histo.getName(), equalTo("histo"));
List<? extends Bucket> buckets = histo.getBuckets();
assertThat("Size of buckets array is not correct.", buckets.size(), equalTo(50));
double lastValue = ((SimpleValue)(buckets.get(0).getAggregations().get("movavg_values"))).value();
assertThat(Double.compare(lastValue, 0.0d), greaterThanOrEqualTo(0));
double currentValue;
for (int i = 1; i < 49; i++) {
SimpleValue current = buckets.get(i).getAggregations().get("movavg_values");
if (current != null) {
currentValue = current.value();
// Since there are only two values in this test, at the beginning and end, the moving average should
// decrease every step (until it reaches zero). Crude way to check that it's doing the right thing
// without actually verifying the computed values. Should work for all types of moving avgs and
// gap policies
assertThat(Double.compare(lastValue, currentValue), greaterThanOrEqualTo(0));
lastValue = currentValue;
}
}
SimpleValue current = buckets.get(49).getAggregations().get("movavg_values");
assertThat(current, notNullValue());
currentValue = current.value();
if (gapPolicy.equals(BucketHelpers.GapPolicy.SKIP)) {
// if we are ignoring, movavg could go up (holt) or stay the same (simple, linear, ewma)
assertThat(Double.compare(lastValue, currentValue), lessThanOrEqualTo(0));
} else if (gapPolicy.equals(BucketHelpers.GapPolicy.INSERT_ZEROS)) {
// If we insert zeros, this should always increase the moving avg since the last bucket has a real value
assertThat(Double.compare(lastValue, currentValue), equalTo(-1));
}
}
/**
* Big gap, but with prediction at the end.
*/
@Test
public void testGiantGapWithPredict() {
int numPredictions = randomIntBetween(1, 10);
SearchResponse response = client()
.prepareSearch("idx").setTypes("gap_type")
.addAggregation(
histogram("histo").field(INTERVAL_FIELD).interval(1).extendedBounds(0L, 49L)
.subAggregation(min("the_metric").field(GAP_FIELD))
.subAggregation(movingAvg("movavg_values")
.window(windowSize)
.modelBuilder(randomModelBuilder())
.gapPolicy(gapPolicy)
.setBucketsPaths("the_metric")
.predict(numPredictions))
).execute().actionGet();
assertSearchResponse(response);
InternalHistogram<Bucket> histo = response.getAggregations().get("histo");
assertThat(histo, notNullValue());
assertThat(histo.getName(), equalTo("histo"));
List<? extends Bucket> buckets = histo.getBuckets();
assertThat("Size of buckets array is not correct.", buckets.size(), equalTo(50 + numPredictions));
double lastValue = ((SimpleValue)(buckets.get(0).getAggregations().get("movavg_values"))).value();
assertThat(Double.compare(lastValue, 0.0d), greaterThanOrEqualTo(0));
double currentValue;
for (int i = 1; i < 49; i++) {
SimpleValue current = buckets.get(i).getAggregations().get("movavg_values");
if (current != null) {
currentValue = current.value();
// Since there are only two values in this test, at the beginning and end, the moving average should
// decrease every step (until it reaches zero). Crude way to check that it's doing the right thing
// without actually verifying the computed values. Should work for all types of moving avgs and
// gap policies
assertThat(Double.compare(lastValue, currentValue), greaterThanOrEqualTo(0));
lastValue = currentValue;
}
}
SimpleValue current = buckets.get(49).getAggregations().get("movavg_values");
assertThat(current, notNullValue());
currentValue = current.value();
if (gapPolicy.equals(BucketHelpers.GapPolicy.SKIP)) {
// if we are ignoring, movavg could go up (holt) or stay the same (simple, linear, ewma)
assertThat(Double.compare(lastValue, currentValue), lessThanOrEqualTo(0));
} else if (gapPolicy.equals(BucketHelpers.GapPolicy.INSERT_ZEROS)) {
// If we insert zeros, this should always increase the moving avg since the last bucket has a real value
assertThat(Double.compare(lastValue, currentValue), equalTo(-1));
}
// Now check predictions
for (int i = 50; i < 50 + numPredictions; i++) {
// Unclear at this point which direction the predictions will go, just verify they are
// not null, and that we don't have the_metric anymore
assertThat((buckets.get(i).getAggregations().get("movavg_values")), notNullValue());
assertThat((buckets.get(i).getAggregations().get("the_metric")), nullValue());
}
}
/**
* This test filters the "gap" data so that the first doc is excluded. This leaves a long stretch of empty
* buckets until the final bucket. The moving avg should be zero up until the last bucket, and should work
* regardless of mov avg type or gap policy.
*/
@Test
public void testLeftGap() {
SearchResponse response = client()
.prepareSearch("idx").setTypes("gap_type")
.addAggregation(
filter("filtered").filter(new RangeQueryBuilder(INTERVAL_FIELD).from(1)).subAggregation(
histogram("histo").field(INTERVAL_FIELD).interval(1).extendedBounds(0L, 49L)
.subAggregation(randomMetric("the_metric", GAP_FIELD))
.subAggregation(movingAvg("movavg_values")
.window(windowSize)
.modelBuilder(randomModelBuilder())
.gapPolicy(gapPolicy)
.setBucketsPaths("the_metric"))
))
.execute().actionGet();
assertSearchResponse(response);
InternalFilter filtered = response.getAggregations().get("filtered");
assertThat(filtered, notNullValue());
assertThat(filtered.getName(), equalTo("filtered"));
InternalHistogram<Bucket> histo = filtered.getAggregations().get("histo");
assertThat(histo, notNullValue());
assertThat(histo.getName(), equalTo("histo"));
List<? extends Bucket> buckets = histo.getBuckets();
assertThat("Size of buckets array is not correct.", buckets.size(), equalTo(50));
double lastValue = 0;
double currentValue;
for (int i = 0; i < 50; i++) {
SimpleValue current = buckets.get(i).getAggregations().get("movavg_values");
if (current != null) {
currentValue = current.value();
assertThat(Double.compare(lastValue, currentValue), lessThanOrEqualTo(0));
lastValue = currentValue;
}
}
}
@Test
public void testLeftGapWithPredict() {
int numPredictions = randomIntBetween(1, 10);
SearchResponse response = client()
.prepareSearch("idx").setTypes("gap_type")
.addAggregation(
filter("filtered").filter(new RangeQueryBuilder(INTERVAL_FIELD).from(1)).subAggregation(
histogram("histo").field(INTERVAL_FIELD).interval(1).extendedBounds(0L, 49L)
.subAggregation(randomMetric("the_metric", GAP_FIELD))
.subAggregation(movingAvg("movavg_values")
.window(windowSize)
.modelBuilder(randomModelBuilder())
.gapPolicy(gapPolicy)
.setBucketsPaths("the_metric")
.predict(numPredictions))
))
.execute().actionGet();
assertSearchResponse(response);
InternalFilter filtered = response.getAggregations().get("filtered");
assertThat(filtered, notNullValue());
assertThat(filtered.getName(), equalTo("filtered"));
InternalHistogram<Bucket> histo = filtered.getAggregations().get("histo");
assertThat(histo, notNullValue());
assertThat(histo.getName(), equalTo("histo"));
List<? extends Bucket> buckets = histo.getBuckets();
assertThat("Size of buckets array is not correct.", buckets.size(), equalTo(50 + numPredictions));
double lastValue = 0;
double currentValue;
for (int i = 0; i < 50; i++) {
SimpleValue current = buckets.get(i).getAggregations().get("movavg_values");
if (current != null) {
currentValue = current.value();
assertThat(Double.compare(lastValue, currentValue), lessThanOrEqualTo(0));
lastValue = currentValue;
}
}
// Now check predictions
for (int i = 50; i < 50 + numPredictions; i++) {
// Unclear at this point which direction the predictions will go, just verify they are
// not null, and that we don't have the_metric anymore
assertThat((buckets.get(i).getAggregations().get("movavg_values")), notNullValue());
assertThat((buckets.get(i).getAggregations().get("the_metric")), nullValue());
}
}
/**
* This test filters the "gap" data so that the last doc is excluded. This leaves a long stretch of empty
* buckets after the first bucket.
*/
@Test
public void testRightGap() {
SearchResponse response = client()
.prepareSearch("idx").setTypes("gap_type")
.addAggregation(
filter("filtered").filter(new RangeQueryBuilder(INTERVAL_FIELD).to(1)).subAggregation(
histogram("histo").field(INTERVAL_FIELD).interval(1).extendedBounds(0L, 49L)
.subAggregation(randomMetric("the_metric", GAP_FIELD))
.subAggregation(movingAvg("movavg_values")
.window(windowSize)
.modelBuilder(randomModelBuilder())
.gapPolicy(gapPolicy)
.setBucketsPaths("the_metric"))
))
.execute().actionGet();
assertSearchResponse(response);
InternalFilter filtered = response.getAggregations().get("filtered");
assertThat(filtered, notNullValue());
assertThat(filtered.getName(), equalTo("filtered"));
InternalHistogram<Bucket> histo = filtered.getAggregations().get("histo");
assertThat(histo, notNullValue());
assertThat(histo.getName(), equalTo("histo"));
List<? extends Bucket> buckets = histo.getBuckets();
assertThat("Size of buckets array is not correct.", buckets.size(), equalTo(50));
SimpleValue current = buckets.get(0).getAggregations().get("movavg_values");
assertThat(current, notNullValue());
double lastValue = current.value();
double currentValue;
for (int i = 1; i < 50; i++) {
current = buckets.get(i).getAggregations().get("movavg_values");
if (current != null) {
currentValue = current.value();
assertThat(Double.compare(lastValue, currentValue), greaterThanOrEqualTo(0));
lastValue = currentValue;
}
}
}
@Test
public void testRightGapWithPredict() {
int numPredictions = randomIntBetween(1, 10);
SearchResponse response = client()
.prepareSearch("idx").setTypes("gap_type")
.addAggregation(
filter("filtered").filter(new RangeQueryBuilder(INTERVAL_FIELD).to(1)).subAggregation(
histogram("histo").field(INTERVAL_FIELD).interval(1).extendedBounds(0L, 49L)
.subAggregation(randomMetric("the_metric", GAP_FIELD))
.subAggregation(movingAvg("movavg_values")
.window(windowSize)
.modelBuilder(randomModelBuilder())
.gapPolicy(gapPolicy)
.setBucketsPaths("the_metric")
.predict(numPredictions))
))
.execute().actionGet();
assertSearchResponse(response);
InternalFilter filtered = response.getAggregations().get("filtered");
assertThat(filtered, notNullValue());
assertThat(filtered.getName(), equalTo("filtered"));
InternalHistogram<Bucket> histo = filtered.getAggregations().get("histo");
assertThat(histo, notNullValue());
assertThat(histo.getName(), equalTo("histo"));
List<? extends Bucket> buckets = histo.getBuckets();
// If we are skipping, there will only be predictions at the very beginning and won't append any new buckets
if (gapPolicy.equals(BucketHelpers.GapPolicy.SKIP)) {
assertThat("Size of buckets array is not correct.", buckets.size(), equalTo(50));
} else {
assertThat("Size of buckets array is not correct.", buckets.size(), equalTo(50 + numPredictions));
}
// Unlike left-gap tests, we cannot check the slope of prediction for right-gap. E.g. linear will
// converge on zero, but holt-linear may trend upwards based on the first value
// Just check for non-nullness
SimpleValue current = buckets.get(0).getAggregations().get("movavg_values");
assertThat(current, notNullValue());
// If we are skipping, there will only be predictions at the very beginning and won't append any new buckets
if (gapPolicy.equals(BucketHelpers.GapPolicy.SKIP)) {
// Now check predictions
for (int i = 1; i < 1 + numPredictions; i++) {
// Unclear at this point which direction the predictions will go, just verify they are
// not null
assertThat(buckets.get(i).getDocCount(), equalTo(0L));
assertThat((buckets.get(i).getAggregations().get("movavg_values")), notNullValue());
}
} else {
// Otherwise we'll have some predictions at the end
for (int i = 50; i < 50 + numPredictions; i++) {
// Unclear at this point which direction the predictions will go, just verify they are
// not null
assertThat(buckets.get(i).getDocCount(), equalTo(0L));
assertThat((buckets.get(i).getAggregations().get("movavg_values")), notNullValue());
}
}
}
@Test
public void testHoltWintersNotEnoughData() {
try {
@ -1288,8 +942,7 @@ public class MovAvgTests extends ElasticsearchIntegrationTest {
assertThat(avgAgg.value(), equalTo(10d));
SimpleValue movAvgAgg = bucket.getAggregations().get("avg_movavg");
assertThat(movAvgAgg, notNullValue());
assertThat(movAvgAgg.value(), equalTo(10d));
assertThat(movAvgAgg, nullValue());
Derivative deriv = bucket.getAggregations().get("deriv");
assertThat(deriv, nullValue());
@ -1297,7 +950,28 @@ public class MovAvgTests extends ElasticsearchIntegrationTest {
SimpleValue derivMovAvg = bucket.getAggregations().get("deriv_movavg");
assertThat(derivMovAvg, nullValue());
for (int i = 1; i < 12; i++) {
// Second bucket
bucket = buckets.get(1);
assertThat(bucket, notNullValue());
assertThat((long) bucket.getKey(), equalTo(1L));
assertThat(bucket.getDocCount(), equalTo(1l));
avgAgg = bucket.getAggregations().get("avg");
assertThat(avgAgg, notNullValue());
assertThat(avgAgg.value(), equalTo(10d));
deriv = bucket.getAggregations().get("deriv");
assertThat(deriv, notNullValue());
assertThat(deriv.value(), equalTo(0d));
movAvgAgg = bucket.getAggregations().get("avg_movavg");
assertThat(movAvgAgg, notNullValue());
assertThat(movAvgAgg.value(), equalTo(10d));
derivMovAvg = bucket.getAggregations().get("deriv_movavg");
assertThat(derivMovAvg, Matchers.nullValue()); // still null because of movavg delay
for (int i = 2; i < 12; i++) {
bucket = buckets.get(i);
assertThat(bucket, notNullValue());
assertThat((long) bucket.getKey(), equalTo((long) i));

View File

@ -21,7 +21,6 @@ package org.elasticsearch.search.aggregations.pipeline.moving.avg;
import com.google.common.collect.EvictingQueue;
import org.elasticsearch.search.SearchParseException;
import org.elasticsearch.search.aggregations.pipeline.movavg.models.*;
import org.elasticsearch.test.ElasticsearchTestCase;
@ -47,7 +46,10 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
double randValue = randomDouble();
double expected = 0;
window.offer(randValue);
if (i == 0) {
window.offer(randValue);
continue;
}
for (double value : window) {
expected += value;
@ -56,6 +58,7 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
double actual = model.next(window);
assertThat(Double.compare(expected, actual), equalTo(0));
window.offer(randValue);
}
}
@ -64,7 +67,7 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
MovAvgModel model = new SimpleModel();
int windowSize = randomIntBetween(1, 50);
int numPredictions = randomIntBetween(1,50);
int numPredictions = randomIntBetween(1, 50);
EvictingQueue<Double> window = EvictingQueue.create(windowSize);
for (int i = 0; i < windowSize; i++) {
@ -73,13 +76,12 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
double actual[] = model.predict(window, numPredictions);
double expected[] = new double[numPredictions];
for (int i = 0; i < numPredictions; i++) {
for (double value : window) {
expected[i] += value;
}
expected[i] /= window.size();
window.offer(expected[i]);
double t = 0;
for (double value : window) {
t += value;
}
t /= window.size();
Arrays.fill(expected, t);
for (int i = 0; i < numPredictions; i++) {
assertThat(Double.compare(expected[i], actual[i]), equalTo(0));
@ -96,7 +98,11 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
EvictingQueue<Double> window = EvictingQueue.create(windowSize);
for (int i = 0; i < numValues; i++) {
double randValue = randomDouble();
window.offer(randValue);
if (i == 0) {
window.offer(randValue);
continue;
}
double avg = 0;
long totalWeight = 1;
@ -110,6 +116,7 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
double expected = avg / totalWeight;
double actual = model.next(window);
assertThat(Double.compare(expected, actual), equalTo(0));
window.offer(randValue);
}
}
@ -127,19 +134,17 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
double actual[] = model.predict(window, numPredictions);
double expected[] = new double[numPredictions];
for (int i = 0; i < numPredictions; i++) {
double avg = 0;
long totalWeight = 1;
long current = 1;
double avg = 0;
long totalWeight = 1;
long current = 1;
for (double value : window) {
avg += value * current;
totalWeight += current;
current += 1;
}
expected[i] = avg / totalWeight;
window.offer(expected[i]);
for (double value : window) {
avg += value * current;
totalWeight += current;
current += 1;
}
avg = avg / totalWeight;
Arrays.fill(expected, avg);
for (int i = 0; i < numPredictions; i++) {
assertThat(Double.compare(expected[i], actual[i]), equalTo(0));
@ -157,7 +162,11 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
EvictingQueue<Double> window = EvictingQueue.create(windowSize);
for (int i = 0; i < numValues; i++) {
double randValue = randomDouble();
window.offer(randValue);
if (i == 0) {
window.offer(randValue);
continue;
}
double avg = 0;
boolean first = true;
@ -173,6 +182,7 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
double expected = avg;
double actual = model.next(window);
assertThat(Double.compare(expected, actual), equalTo(0));
window.offer(randValue);
}
}
@ -191,21 +201,18 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
double actual[] = model.predict(window, numPredictions);
double expected[] = new double[numPredictions];
for (int i = 0; i < numPredictions; i++) {
double avg = 0;
boolean first = true;
double avg = 0;
boolean first = true;
for (double value : window) {
if (first) {
avg = value;
first = false;
} else {
avg = (value * alpha) + (avg * (1 - alpha));
}
for (double value : window) {
if (first) {
avg = value;
first = false;
} else {
avg = (value * alpha) + (avg * (1 - alpha));
}
expected[i] = avg;
window.offer(expected[i]);
}
Arrays.fill(expected, avg);
for (int i = 0; i < numPredictions; i++) {
assertThat(Double.compare(expected[i], actual[i]), equalTo(0));
@ -224,7 +231,11 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
EvictingQueue<Double> window = EvictingQueue.create(windowSize);
for (int i = 0; i < numValues; i++) {
double randValue = randomDouble();
window.offer(randValue);
if (i == 0) {
window.offer(randValue);
continue;
}
double s = 0;
double last_s = 0;
@ -253,6 +264,7 @@ public class MovAvgUnitTests extends ElasticsearchTestCase {
double expected = s + (0 * b) ;
double actual = model.next(window);
assertThat(Double.compare(expected, actual), equalTo(0));
window.offer(randValue);
}
}

View File

@ -33,15 +33,9 @@ import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.*;
import org.elasticsearch.index.query.CommonTermsQueryBuilder.Operator;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder.Type;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.QueryStringQueryBuilder;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.index.query.WrapperQueryBuilder;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.script.Script;
import org.elasticsearch.search.SearchHit;
@ -62,58 +56,11 @@ import java.util.concurrent.ExecutionException;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.settings.Settings.settingsBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.andQuery;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.commonTermsQuery;
import static org.elasticsearch.index.query.QueryBuilders.constantScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.existsQuery;
import static org.elasticsearch.index.query.QueryBuilders.filteredQuery;
import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.fuzzyQuery;
import static org.elasticsearch.index.query.QueryBuilders.hasChildQuery;
import static org.elasticsearch.index.query.QueryBuilders.idsQuery;
import static org.elasticsearch.index.query.QueryBuilders.indicesQuery;
import static org.elasticsearch.index.query.QueryBuilders.limitQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.missingQuery;
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
import static org.elasticsearch.index.query.QueryBuilders.notQuery;
import static org.elasticsearch.index.query.QueryBuilders.prefixQuery;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.index.query.QueryBuilders.regexpQuery;
import static org.elasticsearch.index.query.QueryBuilders.spanMultiTermQueryBuilder;
import static org.elasticsearch.index.query.QueryBuilders.spanNearQuery;
import static org.elasticsearch.index.query.QueryBuilders.spanNotQuery;
import static org.elasticsearch.index.query.QueryBuilders.spanOrQuery;
import static org.elasticsearch.index.query.QueryBuilders.spanTermQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.index.query.QueryBuilders.termsLookupQuery;
import static org.elasticsearch.index.query.QueryBuilders.termsQuery;
import static org.elasticsearch.index.query.QueryBuilders.typeQuery;
import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery;
import static org.elasticsearch.index.query.QueryBuilders.wrapperQuery;
import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders.scriptFunction;
import static org.elasticsearch.test.VersionUtils.randomVersion;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFirstHit;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHit;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSecondHit;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThirdHit;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.hasId;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.hasScore;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.is;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.hamcrest.Matchers.*;
@Slow
public class SearchQueryTests extends ElasticsearchIntegrationTest {
@ -2195,11 +2142,10 @@ functionScoreQuery(scriptFunction(new Script("_doc['score'].value")))).setMinSco
}
}
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/11478")
@Test
public void testDateProvidedAsNumber() throws ExecutionException, InterruptedException {
createIndex("test");
assertAcked(client().admin().indices().preparePutMapping("test").setType("type").setSource("field", "type=date").get());
assertAcked(client().admin().indices().preparePutMapping("test").setType("type").setSource("field", "type=date,format=epoch_millis").get());
indexRandom(true, client().prepareIndex("test", "type", "1").setSource("field", -1000000000001L),
client().prepareIndex("test", "type", "2").setSource("field", -1000000000000L),
client().prepareIndex("test", "type", "3").setSource("field", -999999999999L));

View File

@ -1562,10 +1562,8 @@ public class SimpleSortTests extends ElasticsearchIntegrationTest {
}
public void testSortMetaField() throws Exception {
final boolean idDocValues = random().nextBoolean();
final boolean timestampDocValues = random().nextBoolean();
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_timestamp").field("enabled", true).field("store", true).field("index", !timestampDocValues || randomBoolean() ? "not_analyzed" : "no").field("doc_values", timestampDocValues).endObject()
.startObject("_timestamp").field("enabled", true).endObject()
.endObject().endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mapping));

View File

@ -373,12 +373,8 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
if (frequently() && randomDynamicTemplates()) {
mappings = XContentFactory.jsonBuilder().startObject().startObject("_default_");
if (randomBoolean()) {
boolean timestampEnabled = randomBoolean();
mappings.startObject(TimestampFieldMapper.NAME)
.field("enabled", timestampEnabled);
if (timestampEnabled) {
mappings.field("doc_values", randomBoolean());
}
.field("enabled", randomBoolean());
mappings.endObject();
}
if (randomBoolean()) {

View File

@ -600,4 +600,13 @@ public abstract class ElasticsearchTestCase extends LuceneTestCase {
return list.subList(0, size);
}
/**
* Returns true iff assertions for elasticsearch packages are enabled
*/
public static boolean assertionsEnabled() {
boolean enabled = false;
assert (enabled = true);
return enabled;
}
}

View File

@ -18,7 +18,6 @@
*/
package org.elasticsearch.test.cluster;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.*;
import org.elasticsearch.cluster.block.ClusterBlock;
@ -135,6 +134,11 @@ public class NoopClusterService implements ClusterService {
return 0;
}
@Override
public TimeValue getMaxTaskWaitTime() {
return TimeValue.timeValueMillis(0);
}
@Override
public Lifecycle.State lifecycleState() {
return null;

View File

@ -192,6 +192,11 @@ public class TestClusterService implements ClusterService {
throw new UnsupportedOperationException();
}
@Override
public TimeValue getMaxTaskWaitTime() {
throw new UnsupportedOperationException();
}
@Override
public Lifecycle.State lifecycleState() {
throw new UnsupportedOperationException();

View File

@ -46,7 +46,7 @@ public class SimpleTimestampTests extends ElasticsearchIntegrationTest {
public void testSimpleTimestamp() throws Exception {
client().admin().indices().prepareCreate("test")
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("_timestamp").field("enabled", true).field("store", "yes").endObject().endObject().endObject())
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("_timestamp").field("enabled", true).endObject().endObject().endObject())
.execute().actionGet();
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().execute().actionGet();
@ -119,14 +119,14 @@ public class SimpleTimestampTests extends ElasticsearchIntegrationTest {
String index = "foo";
String type = "mytype";
XContentBuilder builder = jsonBuilder().startObject().startObject("_timestamp").field("enabled", true).field("store", true).endObject().endObject();
XContentBuilder builder = jsonBuilder().startObject().startObject("_timestamp").field("enabled", true).endObject().endObject();
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
// check mapping again
assertTimestampMappingEnabled(index, type, true);
// update some field in the mapping
XContentBuilder updateMappingBuilder = jsonBuilder().startObject().startObject("_timestamp").field("enabled", false).field("store", true).endObject().endObject();
XContentBuilder updateMappingBuilder = jsonBuilder().startObject().startObject("_timestamp").field("enabled", false).endObject().endObject();
PutMappingResponse putMappingResponse = client().admin().indices().preparePutMapping(index).setType(type).setSource(updateMappingBuilder).get();
assertAcked(putMappingResponse);

View File

@ -69,14 +69,14 @@ public class SimpleTTLTests extends ElasticsearchIntegrationTest {
.addMapping("type1", XContentFactory.jsonBuilder()
.startObject()
.startObject("type1")
.startObject("_timestamp").field("enabled", true).field("store", "yes").endObject()
.startObject("_timestamp").field("enabled", true).endObject()
.startObject("_ttl").field("enabled", true).endObject()
.endObject()
.endObject())
.addMapping("type2", XContentFactory.jsonBuilder()
.startObject()
.startObject("type2")
.startObject("_timestamp").field("enabled", true).field("store", "yes").endObject()
.startObject("_timestamp").field("enabled", true).endObject()
.startObject("_ttl").field("enabled", true).field("default", "1d").endObject()
.endObject()
.endObject()));

View File

@ -70,7 +70,7 @@ public class UpdateTests extends ElasticsearchIntegrationTest {
.addMapping("type1", XContentFactory.jsonBuilder()
.startObject()
.startObject("type1")
.startObject("_timestamp").field("enabled", true).field("store", "yes").endObject()
.startObject("_timestamp").field("enabled", true).endObject()
.startObject("_ttl").field("enabled", true).endObject()
.endObject()
.endObject()));
@ -476,7 +476,7 @@ public class UpdateTests extends ElasticsearchIntegrationTest {
.addMapping("type1", XContentFactory.jsonBuilder()
.startObject()
.startObject("type1")
.startObject("_timestamp").field("enabled", true).field("store", "yes").endObject()
.startObject("_timestamp").field("enabled", true).endObject()
.startObject("_ttl").field("enabled", true).endObject()
.endObject()
.endObject())
@ -484,7 +484,7 @@ public class UpdateTests extends ElasticsearchIntegrationTest {
.startObject()
.startObject("subtype1")
.startObject("_parent").field("type", "type1").endObject()
.startObject("_timestamp").field("enabled", true).field("store", "yes").endObject()
.startObject("_timestamp").field("enabled", true).endObject()
.startObject("_ttl").field("enabled", true).endObject()
.endObject()
.endObject())
@ -634,7 +634,7 @@ public class UpdateTests extends ElasticsearchIntegrationTest {
.addMapping("type1", XContentFactory.jsonBuilder()
.startObject()
.startObject("type1")
.startObject("_timestamp").field("enabled", true).field("store", "yes").endObject()
.startObject("_timestamp").field("enabled", true).endObject()
.startObject("_ttl").field("enabled", true).endObject()
.endObject()
.endObject())

View File

@ -7,7 +7,7 @@ can be cached for faster responses. These cached results are the same results
that would be returned by an uncached aggregation -- you will never get stale
results.
See <<index-modules-shard-query-cache>> for more details.
See <<shard-query-cache>> for more details.
[[returning-only-agg-results]]
== Returning only aggregation results

View File

@ -86,6 +86,12 @@ Will return, for example:
"total": "8gb",
"total_in_bytes": 8589934592
},
"names": [
{
"name": "Mac OS X",
"count": 1
}
],
"cpu": [
{
"vendor": "Intel",

View File

@ -10,8 +10,8 @@ survive a full cluster restart). Here is an example:
curl -XPUT localhost:9200/_cluster/settings -d '{
"persistent" : {
"discovery.zen.minimum_master_nodes" : 2
}
}'
}
}'
--------------------------------------------------
Or:
@ -21,8 +21,8 @@ Or:
curl -XPUT localhost:9200/_cluster/settings -d '{
"transient" : {
"discovery.zen.minimum_master_nodes" : 2
}
}'
}
}'
--------------------------------------------------
The cluster responds with the settings updated. So the response for the
@ -34,8 +34,8 @@ last example will be:
"persistent" : {},
"transient" : {
"discovery.zen.minimum_master_nodes" : "2"
}
}'
}
}'
--------------------------------------------------
Cluster wide settings can be returned using:
@ -45,157 +45,7 @@ Cluster wide settings can be returned using:
curl -XGET localhost:9200/_cluster/settings
--------------------------------------------------
There is a specific list of settings that can be updated, those include:
[float]
[[cluster-settings]]
=== Cluster settings
[float]
==== Routing allocation
[float]
===== Awareness
`cluster.routing.allocation.awareness.attributes`::
See <<modules-cluster>>.
`cluster.routing.allocation.awareness.force.*`::
See <<modules-cluster>>.
[float]
===== Balanced Shards
All these values are relative to one another. The first three are used to
compose a three separate weighting functions into one. The cluster is balanced
when no allowed action can bring the weights of each node closer together by
more then the fourth setting. Actions might not be allowed, for instance,
due to forced awareness or allocation filtering.
`cluster.routing.allocation.balance.shard`::
Defines the weight factor for shards allocated on a node
(float). Defaults to `0.45f`. Raising this raises the tendency to
equalize the number of shards across all nodes in the cluster.
`cluster.routing.allocation.balance.index`::
Defines a factor to the number of shards per index allocated
on a specific node (float). Defaults to `0.55f`. Raising this raises the
tendency to equalize the number of shards per index across all nodes in
the cluster.
`cluster.routing.allocation.balance.threshold`::
Minimal optimization value of operations that should be performed (non
negative float). Defaults to `1.0f`. Raising this will cause the cluster
to be less aggressive about optimizing the shard balance.
[float]
===== Concurrent Rebalance
`cluster.routing.allocation.cluster_concurrent_rebalance`::
Allow to control how many concurrent rebalancing of shards are
allowed cluster wide, and default it to `2` (integer). `-1` for
unlimited. See also <<modules-cluster>>.
[float]
===== Enable allocation
`cluster.routing.allocation.enable`::
See <<modules-cluster>>.
[float]
===== Throttling allocation
`cluster.routing.allocation.node_initial_primaries_recoveries`::
See <<modules-cluster>>.
`cluster.routing.allocation.node_concurrent_recoveries`::
See <<modules-cluster>>.
[float]
===== Filter allocation
`cluster.routing.allocation.include.*`::
See <<modules-cluster>>.
`cluster.routing.allocation.exclude.*`::
See <<modules-cluster>>.
`cluster.routing.allocation.require.*`
See <<modules-cluster>>.
[float]
==== Metadata
`cluster.blocks.read_only`::
Have the whole cluster read only (indices do not accept write operations), metadata is not allowed to be modified (create or delete indices).
[float]
==== Discovery
`discovery.zen.minimum_master_nodes`::
See <<modules-discovery-zen>>
`discovery.zen.publish_timeout`::
See <<modules-discovery-zen>>
[float]
==== Threadpools
`threadpool.*`::
See <<modules-threadpool>>
[float]
[[cluster-index-settings]]
=== Index settings
[float]
==== Index filter cache
`indices.cache.filter.size`::
See <<index-modules-cache>>
[float]
==== TTL interval
`indices.ttl.interval` (time)::
See <<mapping-ttl-field>>
[float]
==== Recovery
`indices.recovery.concurrent_streams`::
See <<modules-indices>>
`indices.recovery.concurrent_small_file_streams`::
See <<modules-indices>>
`indices.recovery.file_chunk_size`::
See <<modules-indices>>
`indices.recovery.translog_ops`::
See <<modules-indices>>
`indices.recovery.translog_size`::
See <<modules-indices>>
`indices.recovery.compress`::
See <<modules-indices>>
`indices.recovery.max_bytes_per_sec`::
See <<modules-indices>>
[float]
[[logger]]
=== Logger
Logger values can also be updated by setting `logger.` prefix. More
settings will be allowed to be updated.
[float]
=== Field data circuit breaker
`indices.breaker.fielddata.limit`::
See <<index-modules-fielddata>>
`indices.breaker.fielddata.overhead`::
See <<index-modules-fielddata>>
A list of dynamically updatable settings can be found in the
<<modules,Modules>> documentation.

View File

@ -1,49 +1,177 @@
[[index-modules]]
= Index Modules
[partintro]
--
Index Modules are modules created per index and control all aspects
related to an index. Since those modules lifecycle are tied to an index,
all the relevant modules settings can be provided when creating an index
(and it is actually the recommended way to configure an index).
Index Modules are modules created per index and control all aspects related to
an index.
[float]
[[index-modules-settings]]
== Index Settings
There are specific index level settings that are not associated with any
specific module. These include:
Index level settings can be set per-index. Settings may be:
_static_::
They can only be set at index creation time or on a
<<indices-open-close,closed index>>.
_dynamic_::
They can be changed on a live index using the
<<indices-update-settings,update-index-settings>> API.
WARNING: Changing static or dynamic index settings on a closed index could
result in incorrect settings that are impossible to rectify without deleting
and recreating the index.
[float]
=== Static index settings
Below is a list of all _static_ index settings that are not associated with any
specific index module:
`index.number_of_shards`::
The number of primary shards that an index should have. Defaults to 5.
This setting can only be set at index creation time. It cannot be
changed on a closed index.
`index.shard.check_on_startup`::
+
--
experimental[] Whether or not shards should be checked for corruption before opening. When
corruption is detected, it will prevent the shard from being opened. Accepts:
`false`::
(default) Don't check for corruption when opening a shard.
`checksum`::
Check for physical corruption.
`true`::
Check for both physical and logical corruption. This is much more
expensive in terms of CPU and memory usage.
`fix`::
Check for both physical and logical corruption. Segments that were reported
as corrupted will be automatically removed. This option *may result in data loss*.
Use with extreme caution!
Checking shards may take a lot of time on large indices.
--
[float]
[[dynamic-index-settings]]
=== Dynamic index settings
Below is a list of all _dynamic_ index settings that are not associated with any
specific index module:
`index.number_of_replicas`::
The number of replicas each primary shard has. Defaults to 1.
`index.auto_expand_replicas`::
Auto-expand the number of replicas based on the number of available nodes.
Set to a dash delimited lower and upper bound (e.g. `0-5`) or use `all`
for the upper bound (e.g. `0-all`). Defaults to `false` (i.e. disabled).
`index.refresh_interval`::
A time setting controlling how often the
refresh operation will be executed. Defaults to `1s`. Can be set to `-1`
in order to disable it.
How often to perform a refresh operation, which makes recent changes to the
index visible to search. Defaults to `1s`. Can be set to `-1` to disable
refresh.
`index.codec`::
experimental[]
The `default` value compresses stored data with LZ4 compression, but
this can be set to `best_compression` for a higher compression ratio,
at the expense of slower stored fields performance.
experimental[] The `default` value compresses stored data with LZ4
compression, but this can be set to `best_compression` for a higher
compression ratio, at the expense of slower stored fields performance.
`index.shard.check_on_startup`::
`index.blocks.read_only`::
experimental[]
Should shard consistency be checked upon opening. When corruption is detected,
it will prevent the shard from being opened.
+
When `checksum`, check for physical corruption.
When `true`, check for both physical and logical corruption. This is much
more expensive in terms of CPU and memory usage.
When `fix`, check for both physical and logical corruption, and segments
that were reported as corrupted will be automatically removed.
Default value is `false`, which performs no checks.
Set to `true` to make the index and index metadata read only, `false` to
allow writes and metadata changes.
NOTE: Checking shards may take a lot of time on large indices.
`index.blocks.read`::
WARNING: Setting `index.shard.check_on_startup` to `fix` may result in data loss,
use with extreme caution.
Set to `true` to disable read operations against the index.
`index.blocks.write`::
Set to `true` to disable write operations against the index.
`index.blocks.metadata`::
Set to `true` to disable index metadata reads and writes.
`index.ttl.disable_purge`::
experimental[] Disables the purge of <<mapping-ttl-field,expired docs>> on
the current index.
[[index.recovery.initial_shards]]`index.recovery.initial_shards`::
+
--
A primary shard is only recovered only if there are enough nodes available to
allocate sufficient replicas to form a quorum. It can be set to:
* `quorum` (default)
* `quorum-1` (or `half`)
* `full`
* `full-1`.
* Number values are also supported, e.g. `1`.
--
[float]
=== Settings in other index modules
Other index settings are available in index modules:
<<analysis,Analysis>>::
Settings to define analyzers, tokenizers, token filters and character
filters.
<<index-modules-allocation,Index shard allocation>>::
Control over where, when, and how shards are allocated to nodes.
<<index-modules-mapper,Mapping>>::
Enable or disable dynamic mapping for an index.
<<index-modules-merge,Merging>>::
Control over how shards are merged by the background merge process.
<<index-modules-similarity,Similarities>>::
Configure custom similarity settings to customize how search results are
scored.
<<index-modules-slowlog,Slowlog>>::
Control over how slow queries and fetch requests are logged.
<<index-modules-store,Store>>::
Configure the type of filesystem used to access shard data.
<<index-modules-translog,Translog>>::
Control over the transaction log and background flush operations.
--
@ -51,22 +179,16 @@ include::index-modules/analysis.asciidoc[]
include::index-modules/allocation.asciidoc[]
include::index-modules/slowlog.asciidoc[]
include::index-modules/mapper.asciidoc[]
include::index-modules/merge.asciidoc[]
include::index-modules/store.asciidoc[]
include::index-modules/similarity.asciidoc[]
include::index-modules/mapper.asciidoc[]
include::index-modules/slowlog.asciidoc[]
include::index-modules/store.asciidoc[]
include::index-modules/translog.asciidoc[]
include::index-modules/cache.asciidoc[]
include::index-modules/query-cache.asciidoc[]
include::index-modules/fielddata.asciidoc[]
include::index-modules/similarity.asciidoc[]

View File

@ -1,168 +1,131 @@
[[index-modules-allocation]]
== Index Shard Allocation
This module provides per-index settings to control the allocation of shards to
nodes.
[float]
[[shard-allocation-filtering]]
=== Shard Allocation Filtering
Allows to control the allocation of indices on nodes based on include/exclude
filters. The filters can be set both on the index level and on the
cluster level. Lets start with an example of setting it on the cluster
level:
Shard allocation filtering allows you to specify which nodes are allowed
to host the shards of a particular index.
Lets say we have 4 nodes, each has specific attribute called `tag`
associated with it (the name of the attribute can be any name). Each
node has a specific value associated with `tag`. Node 1 has a setting
`node.tag: value1`, Node 2 a setting of `node.tag: value2`, and so on.
NOTE: The per-index shard allocation filters explained below work in
conjunction with the cluster-wide allocation filters explained in
<<shards-allocation>>.
We can create an index that will only deploy on nodes that have `tag`
set to `value1` and `value2` by setting
`index.routing.allocation.include.tag` to `value1,value2`. For example:
It is possible to assign arbitrary metadata attributes to each node at
startup. For instance, nodes could be assigned a `rack` and a `group`
attribute as follows:
[source,js]
--------------------------------------------------
curl -XPUT localhost:9200/test/_settings -d '{
"index.routing.allocation.include.tag" : "value1,value2"
}'
--------------------------------------------------
[source,sh]
------------------------
bin/elasticsearch --node.rack rack1 --node.size big <1>
------------------------
<1> These attribute settings can also be specfied in the `elasticsearch.yml` config file.
On the other hand, we can create an index that will be deployed on all
nodes except for nodes with a `tag` of value `value3` by setting
`index.routing.allocation.exclude.tag` to `value3`. For example:
These metadata attributes can be used with the
`index.routing.allocation.*` settings to allocate an index to a particular
group of nodes. For instance, we can move the index `test` to either `big` or
`medium` nodes as follows:
[source,js]
--------------------------------------------------
curl -XPUT localhost:9200/test/_settings -d '{
"index.routing.allocation.exclude.tag" : "value3"
}'
--------------------------------------------------
[source,json]
------------------------
PUT test/_settings
{
"index.routing.allocation.include.size": "big,medium"
}
------------------------
// AUTOSENSE
`index.routing.allocation.require.*` can be used to
specify a number of rules, all of which MUST match in order for a shard
to be allocated to a node. This is in contrast to `include` which will
include a node if ANY rule matches.
Alternatively, we can move the index `test` away from the `small` nodes with
an `exclude` rule:
The `include`, `exclude` and `require` values can have generic simple
matching wildcards, for example, `value1*`. Additionally, special attribute
names called `_ip`, `_name`, `_id` and `_host` can be used to match by node
ip address, name, id or host name, respectively.
[source,json]
------------------------
PUT test/_settings
{
"index.routing.allocation.exclude.size": "small"
}
------------------------
// AUTOSENSE
Obviously a node can have several attributes associated with it, and
both the attribute name and value are controlled in the setting. For
example, here is a sample of several node configurations:
Multiple rules can be specified, in which case all conditions must be
satisfied. For instance, we could move the index `test` to `big` nodes in
`rack1` with the following:
[source,js]
--------------------------------------------------
node.group1: group1_value1
node.group2: group2_value4
--------------------------------------------------
[source,json]
------------------------
PUT test/_settings
{
"index.routing.allocation.include.size": "big",
"index.routing.allocation.include.rack": "rack1"
}
------------------------
// AUTOSENSE
In the same manner, `include`, `exclude` and `require` can work against
several attributes, for example:
NOTE: If some conditions cannot be satisfied then shards will not be moved.
[source,js]
--------------------------------------------------
curl -XPUT localhost:9200/test/_settings -d '{
"index.routing.allocation.include.group1" : "xxx"
"index.routing.allocation.include.group2" : "yyy",
"index.routing.allocation.exclude.group3" : "zzz",
"index.routing.allocation.require.group4" : "aaa",
}'
--------------------------------------------------
The following settings are _dynamic_, allowing live indices to be moved from
one set of nodes to another:
The provided settings can also be updated in real time using the update
settings API, allowing to "move" indices (shards) around in realtime.
`index.routing.allocation.include.{attribute}`::
Cluster wide filtering can also be defined, and be updated in real time
using the cluster update settings API. This setting can come in handy
for things like decommissioning nodes (even if the replica count is set
to 0). Here is a sample of how to decommission a node based on `_ip`
address:
Assign the index to a node whose `{attribute}` has at least one of the
comma-separated values.
[source,js]
--------------------------------------------------
curl -XPUT localhost:9200/_cluster/settings -d '{
"transient" : {
"cluster.routing.allocation.exclude._ip" : "10.0.0.1"
}
}'
--------------------------------------------------
`index.routing.allocation.require.{attribute}`::
Assign the index to a node whose `{attribute}` has _all_ of the
comma-separated values.
`index.routing.allocation.exclude.{attribute}`::
Assign the index to a node whose `{attribute}` has _none_ of the
comma-separated values.
These special attributes are also supported:
[horizontal]
`_name`:: Match nodes by node name
`_ip`:: Match nodes by IP address (the IP address associated with the hostname)
`_host`:: Match nodes by hostname
All attribute values can be specified with wildcards, eg:
[source,json]
------------------------
PUT test/_settings
{
"index.routing.allocation.include._ip": "192.168.2.*"
}
------------------------
// AUTOSENSE
[float]
=== Total Shards Per Node
The `index.routing.allocation.total_shards_per_node` setting allows to
control how many total shards (replicas and primaries) for an index will be allocated per node.
It can be dynamically set on a live index using the update index
settings API.
The cluster-level shard allocator tries to spread the shards of a single index
across as many nodes as possible. However, depending on how many shards and
indices you have, and how big they are, it may not always be possible to spread
shards evenly.
[float]
[[disk]]
=== Disk-based Shard Allocation
The following _dynamic_ setting allows you to specify a hard limit on the total
number of shards from a single index allowed per node:
disk based shard allocation is enabled from version 1.3.0 onward
`index.routing.allocation.total_shards_per_node`::
Elasticsearch can be configured to prevent shard
allocation on nodes depending on disk usage for the node. This
functionality is enabled by default, and can be changed either in the
configuration file, or dynamically using:
The maximum number of shards (replicas and primaries) that will be
allocated to a single node. Defaults to unbounded.
[source,js]
--------------------------------------------------
curl -XPUT localhost:9200/_cluster/settings -d '{
"transient" : {
"cluster.routing.allocation.disk.threshold_enabled" : false
}
}'
--------------------------------------------------
[WARNING]
=======================================
This setting imposes a hard limit which can result in some shards not
being allocated.
Once enabled, Elasticsearch uses two watermarks to decide whether
shards should be allocated or can remain on the node.
Use with caution.
=======================================
`cluster.routing.allocation.disk.watermark.low` controls the low
watermark for disk usage. It defaults to 85%, meaning ES will not
allocate new shards to nodes once they have more than 85% disk
used. It can also be set to an absolute byte value (like 500mb) to
prevent ES from allocating shards if less than the configured amount
of space is available.
`cluster.routing.allocation.disk.watermark.high` controls the high
watermark. It defaults to 90%, meaning ES will attempt to relocate
shards to another node if the node disk usage rises above 90%. It can
also be set to an absolute byte value (similar to the low watermark)
to relocate shards once less than the configured amount of space is
available on the node.
NOTE: Percentage values refer to used disk space, while byte values refer to
free disk space. This can be confusing, since it flips the meaning of
high and low. For example, it makes sense to set the low watermark to 10gb
and the high watermark to 5gb, but not the other way around.
Both watermark settings can be changed dynamically using the cluster
settings API. By default, Elasticsearch will retrieve information
about the disk usage of the nodes every 30 seconds. This can also be
changed by setting the `cluster.info.update.interval` setting.
An example of updating the low watermark to no more than 80% of the disk size, a
high watermark of at least 50 gigabytes free, and updating the information about
the cluster every minute:
[source,js]
--------------------------------------------------
curl -XPUT localhost:9200/_cluster/settings -d '{
"transient" : {
"cluster.routing.allocation.disk.watermark.low" : "80%",
"cluster.routing.allocation.disk.watermark.high" : "50gb",
"cluster.info.update.interval" : "1m"
}
}'
--------------------------------------------------
By default, Elasticsearch will take into account shards that are currently being
relocated to the target node when computing a node's disk usage. This can be
changed by setting the `cluster.routing.allocation.disk.include_relocations`
setting to `false` (defaults to `true`). Taking relocating shards' sizes into
account may, however, mean that the disk usage for a node is incorrectly
estimated on the high side, since the relocation could be 90% complete and a
recently retrieved disk usage would include the total size of the relocating
shard as well as the space already used by the running relocation.

View File

@ -1,18 +1,12 @@
[[index-modules-analysis]]
== Analysis
The index analysis module acts as a configurable registry of Analyzers
that can be used in order to break down indexed (analyzed) fields when a
document is indexed as well as to process query strings. It maps to the Lucene
`Analyzer`.
The index analysis module acts as a configurable registry of _analyzers_
that can be used in order to convert a string field into individual terms
which are:
Analyzers are (generally) composed of a single `Tokenizer` and zero or
more `TokenFilters`. A set of `CharFilters` can be associated with an
analyzer to process the characters prior to other analysis steps. The
analysis module allows one to register `TokenFilters`, `Tokenizers` and
`Analyzers` under logical names that can then be referenced either in
mapping definitions or in certain APIs. The Analysis module
automatically registers (*if not explicitly defined*) built in
analyzers, token filters, and tokenizers.
* added to the inverted index in order to make the document searchable
* used by high level queries such as the <<query-dsl-match-query,`match` query>>
to generate seach terms.
See <<analysis>> for configuration details.
See <<analysis>> for configuration details.

View File

@ -1,33 +0,0 @@
[[index-modules-cache]]
== Cache
There are different caching inner modules associated with an index. They
include `filter` and others.
[float]
[[filter]]
=== Filter Cache
The filter cache is responsible for caching the results of filters (used
in the query). The default implementation of a filter cache (and the one
recommended to use in almost all cases) is the `node` filter cache type.
[float]
[[node-filter]]
==== Node Filter Cache
The `node` filter cache may be configured to use either a percentage of
the total memory allocated to the process or a specific amount of
memory. All shards present on a node share a single node cache (thats
why its called `node`). The cache implements an LRU eviction policy:
when a cache becomes full, the least recently used data is evicted to
make way for new data.
The setting that allows one to control the memory size for the filter
cache is `indices.cache.filter.size`, which defaults to `10%`. *Note*,
this is *not* an index level setting but a node level setting (can be
configured in the node configuration).
`indices.cache.filter.size` can accept either a percentage value, like
`30%`, or an exact value, like `512mb`.

View File

@ -49,5 +49,10 @@ automatically.
The default mapping can be overridden by specifying the `_default_` type when
creating a new index.
Dynamic creation of mappings for unmapped types can be completely
disabled by setting `index.mapper.dynamic` to `false`.
[float]
=== Mapper settings
`index.mapper.dynamic` (_static_)::
Dynamic creation of mappings for unmapped types can be completely
disabled by setting `index.mapper.dynamic` to `false`.

View File

@ -14,6 +14,11 @@ number of segments per tier. The merge policy is able to merge
non-adjacent segments, and separates how many segments are merged at once from how many
segments are allowed per tier. It also does not over-merge (i.e., cascade merges).
[float]
[[merge-settings]]
=== Merge policy settings
All merge policy settings are _dynamic_ and can be updated on a live index.
The merge policy has the following settings:
`index.merge.policy.expunge_deletes_allowed`::
@ -80,30 +85,29 @@ possibly either increase the `max_merged_segment` or issue an optimize
call for the index (try and aim to issue it on a low traffic time).
[float]
[[scheduling]]
=== Scheduling
[[merge-scheduling]]
=== Merge scheduling
The merge scheduler (ConcurrentMergeScheduler) controls the execution of
merge operations once they are needed (according to the merge policy). Merges
run in separate threads, and when the maximum number of threads is reached,
further merges will wait until a merge thread becomes available. The merge
scheduler supports this setting:
further merges will wait until a merge thread becomes available.
The merge scheduler supports the following _dynamic_ settings:
`index.merge.scheduler.max_thread_count`::
The maximum number of threads that may be merging at once. Defaults to
`Math.max(1, Math.min(4, Runtime.getRuntime().availableProcessors() / 2))`
which works well for a good solid-state-disk (SSD). If your index is on
spinning platter drives instead, decrease this to 1.
The maximum number of threads that may be merging at once. Defaults to
`Math.max(1, Math.min(4, Runtime.getRuntime().availableProcessors() / 2))`
which works well for a good solid-state-disk (SSD). If your index is on
spinning platter drives instead, decrease this to 1.
`index.merge.scheduler.auto_throttle`::
If this is true (the default), then the merge scheduler will
rate-limit IO (writes) for merges to an adaptive value depending on
how many merges are requested over time. An application with a low
indexing rate that unluckily suddenly requires a large merge will see
that merge aggressively throttled, while an application doing heavy
indexing will see the throttle move higher to allow merges to keep up
with ongoing indexing. This is a dynamic setting (you can <<indices-update-settings,change it
at any time on a running index>>).
If this is true (the default), then the merge scheduler will rate-limit IO
(writes) for merges to an adaptive value depending on how many merges are
requested over time. An application with a low indexing rate that
unluckily suddenly requires a large merge will see that merge aggressively
throttled, while an application doing heavy indexing will see the throttle
move higher to allow merges to keep up with ongoing indexing.

View File

@ -1,29 +1,31 @@
[[index-modules-slowlog]]
== Index Slow Log
== Slow Log
[float]
[[search-slow-log]]
=== Search Slow Log
Shard level slow search log allows to log slow search (query and fetch
executions) into a dedicated log file.
phases) into a dedicated log file.
Thresholds can be set for both the query phase of the execution, and
fetch phase, here is a sample:
[source,js]
[source,yaml]
--------------------------------------------------
#index.search.slowlog.threshold.query.warn: 10s
#index.search.slowlog.threshold.query.info: 5s
#index.search.slowlog.threshold.query.debug: 2s
#index.search.slowlog.threshold.query.trace: 500ms
index.search.slowlog.threshold.query.warn: 10s
index.search.slowlog.threshold.query.info: 5s
index.search.slowlog.threshold.query.debug: 2s
index.search.slowlog.threshold.query.trace: 500ms
#index.search.slowlog.threshold.fetch.warn: 1s
#index.search.slowlog.threshold.fetch.info: 800ms
#index.search.slowlog.threshold.fetch.debug: 500ms
#index.search.slowlog.threshold.fetch.trace: 200ms
index.search.slowlog.threshold.fetch.warn: 1s
index.search.slowlog.threshold.fetch.info: 800ms
index.search.slowlog.threshold.fetch.debug: 500ms
index.search.slowlog.threshold.fetch.trace: 200ms
--------------------------------------------------
All of the above settings are _dynamic_ and can be set per-index.
By default, none are enabled (set to `-1`). Levels (`warn`, `info`,
`debug`, `trace`) allow to control under which logging level the log
will be logged. Not all are required to be configured (for example, only
@ -37,14 +39,10 @@ execute. Some of the benefits of shard level logging is the association
of the actual execution on the specific machine, compared with request
level.
All settings are index level settings (and each index can have different
values for it), and can be changed in runtime using the index update
settings API.
The logging file is configured by default using the following
configuration (found in `logging.yml`):
[source,js]
[source,yaml]
--------------------------------------------------
index_search_slow_log_file:
type: dailyRollingFile
@ -64,18 +62,20 @@ log. The log file is ends with `_index_indexing_slowlog.log`. Log and
the thresholds are configured in the elasticsearch.yml file in the same
way as the search slowlog. Index slowlog sample:
[source,js]
[source,yaml]
--------------------------------------------------
#index.indexing.slowlog.threshold.index.warn: 10s
#index.indexing.slowlog.threshold.index.info: 5s
#index.indexing.slowlog.threshold.index.debug: 2s
#index.indexing.slowlog.threshold.index.trace: 500ms
index.indexing.slowlog.threshold.index.warn: 10s
index.indexing.slowlog.threshold.index.info: 5s
index.indexing.slowlog.threshold.index.debug: 2s
index.indexing.slowlog.threshold.index.trace: 500ms
--------------------------------------------------
All of the above settings are _dynamic_ and can be set per-index.
The index slow log file is configured by default in the `logging.yml`
file:
[source,js]
[source,yaml]
--------------------------------------------------
index_indexing_slow_log_file:
type: dailyRollingFile

View File

@ -1,34 +1,16 @@
[[index-modules-store]]
== Store
The store module allows you to control how index data is stored.
The index can either be stored in-memory (no persistence) or on-disk
(the default). In-memory indices provide better performance at the cost
of limiting the index size to the amount of available physical memory.
When using a local gateway (the default), file system storage with *no*
in memory storage is required to maintain index consistency. This is
required since the local gateway constructs its state from the local
index state of each node.
Another important aspect of memory based storage is the fact that
Elasticsearch supports storing the index in memory *outside of the JVM
heap space* using the "Memory" (see below) storage type. It translates
to the fact that there is no need for extra large JVM heaps (with their
own consequences) for storing the index in memory.
experimental[All of the settings exposed in the `store` module are expert only and may be removed in the future]
The store module allows you to control how index data is stored and accessed on disk.
[float]
[[file-system]]
=== File system storage types
File system based storage is the default storage used. There are
different implementations or _storage types_. The best one for the
operating environment will be automatically chosen: `mmapfs` on
Windows 64bit, `simplefs` on Windows 32bit, and `default`
(hybrid `niofs` and `mmapfs`) for the rest.
There are different file system implementations or _storage types_. The best
one for the operating environment will be automatically chosen: `mmapfs` on
Windows 64bit, `simplefs` on Windows 32bit, and `default` (hybrid `niofs` and
`mmapfs`) for the rest.
This can be overridden for all indices by adding this to the
`config/elasticsearch.yml` file:
@ -38,57 +20,53 @@ This can be overridden for all indices by adding this to the
index.store.type: niofs
---------------------------------
It can also be set on a per-index basis at index creation time:
It is a _static_ setting that can be set on a per-index basis at index
creation time:
[source,json]
---------------------------------
curl -XPUT localhost:9200/my_index -d '{
"settings": {
"index.store.type": "niofs"
}
}';
PUT /my_index
{
"settings": {
"index.store.type": "niofs"
}
}
---------------------------------
experimental[This is an expert-only setting and may be removed in the future]
The following sections lists all the different storage types supported.
[float]
[[simplefs]]
==== Simple FS
[[simplefs]]`simplefs`::
The `simplefs` type is a straightforward implementation of file system
The Simple FS type is a straightforward implementation of file system
storage (maps to Lucene `SimpleFsDirectory`) using a random access file.
This implementation has poor concurrent performance (multiple threads
will bottleneck). It is usually better to use the `niofs` when you need
index persistence.
[float]
[[niofs]]
==== NIO FS
[[niofs]]`niofs`::
The `niofs` type stores the shard index on the file system (maps to
The NIO FS type stores the shard index on the file system (maps to
Lucene `NIOFSDirectory`) using NIO. It allows multiple threads to read
from the same file concurrently. It is not recommended on Windows
because of a bug in the SUN Java implementation.
[[mmapfs]]
[float]
==== MMap FS
[[mmapfs]]`mmapfs`::
The `mmapfs` type stores the shard index on the file system (maps to
The MMap FS type stores the shard index on the file system (maps to
Lucene `MMapDirectory`) by mapping a file into memory (mmap). Memory
mapping uses up a portion of the virtual memory address space in your
process equal to the size of the file being mapped. Before using this
class, be sure your have plenty of virtual address space.
See <<vm-max-map-count>>
class, be sure you have allowed plenty of
<<vm-max-map-count,virtual address space>>.
[[default_fs]]
[float]
==== Hybrid MMap / NIO FS
[[default_fs]]`default_fs`::
The `default` type stores the shard index on the file system depending on
the file type by mapping a file into memory (mmap) or using Java NIO. Currently
only the Lucene term dictionary and doc values files are memory mapped to reduce
the impact on the operating system. All other files are opened using Lucene `NIOFSDirectory`.
Address space settings (<<vm-max-map-count>>) might also apply if your term
The `default` type is a hybrid of NIO FS and MMapFS, which chooses the best
file system for each type of file. Currently only the Lucene term dictionary
and doc values files are memory mapped to reduce the impact on the operating
system. All other files are opened using Lucene `NIOFSDirectory`. Address
space settings (<<vm-max-map-count>>) might also apply if your term
dictionaries are large.

View File

@ -43,7 +43,7 @@ specified as well in the URI. Those stats can be any of:
`fielddata`:: Fielddata statistics.
`flush`:: Flush statistics.
`merge`:: Merge statistics.
`query_cache`:: <<index-modules-shard-query-cache,Shard query cache>> statistics.
`query_cache`:: <<shard-query-cache,Shard query cache>> statistics.
`refresh`:: Refresh statistics.
`suggest`:: Suggest statistics.
`warmer`:: Warmer statistics.
@ -80,7 +80,7 @@ curl 'localhost:9200/_stats/search?groups=group1,group2
--------------------------------------------------
The stats returned are aggregated on the index level, with
`primaries` and `total` aggregations, where `primaries` are the values for only the
`primaries` and `total` aggregations, where `primaries` are the values for only the
primary shards, and `total` are the cumulated values for both primary and replica shards.
In order to get back shard level stats, set the `level` parameter to `shards`.

View File

@ -29,130 +29,8 @@ curl -XPUT 'localhost:9200/my_index/_settings' -d '
}'
--------------------------------------------------
[WARNING]
========================
When changing the number of replicas the index needs to be open. Changing
the number of replicas on a closed index might prevent the index to be opened correctly again.
========================
Below is the list of settings that can be changed using the update
settings API:
`index.number_of_replicas`::
The number of replicas each shard has.
`index.auto_expand_replicas` (string)::
Set to a dash delimited lower and upper bound (e.g. `0-5`)
or one may use `all` as the upper bound (e.g. `0-all`), or `false` to disable it.
`index.blocks.read_only`::
Set to `true` to have the index read only, `false` to allow writes
and metadata changes.
`index.blocks.read`::
Set to `true` to disable read operations against the index.
`index.blocks.write`::
Set to `true` to disable write operations against the index.
`index.blocks.metadata`::
Set to `true` to disable metadata operations against the index.
`index.refresh_interval`::
The async refresh interval of a shard.
`index.translog.flush_threshold_ops`::
When to flush based on operations.
`index.translog.flush_threshold_size`::
When to flush based on translog (bytes) size.
`index.translog.flush_threshold_period`::
When to flush based on a period of not flushing.
`index.translog.disable_flush`::
Disables flushing. Note, should be set for a short
interval and then enabled.
`index.cache.filter.max_size`::
The maximum size of filter cache (per segment in shard).
Set to `-1` to disable.
`index.cache.filter.expire`::
experimental[] The expire after access time for filter cache.
Set to `-1` to disable.
`index.gateway.snapshot_interval`::
experimental[] The gateway snapshot interval (only applies to shared
gateways). Defaults to 10s.
<<index-modules-merge,merge policy>>::
All the settings for the merge policy currently configured.
A different merge policy can't be set.
`index.merge.scheduler.*`::
experimental[] All the settings for the merge scheduler.
`index.routing.allocation.include.*`::
A node matching any rule will be allowed to host shards from the index.
`index.routing.allocation.exclude.*`::
A node matching any rule will NOT be allowed to host shards from the index.
`index.routing.allocation.require.*`::
Only nodes matching all rules will be allowed to host shards from the index.
`index.routing.allocation.disable_allocation`::
Disable allocation. Defaults to `false`. Deprecated in favour for `index.routing.allocation.enable`.
`index.routing.allocation.disable_new_allocation`::
Disable new allocation. Defaults to `false`. Deprecated in favour for `index.routing.allocation.enable`.
`index.routing.allocation.disable_replica_allocation`::
Disable replica allocation. Defaults to `false`. Deprecated in favour for `index.routing.allocation.enable`.
`index.routing.allocation.enable`::
Enables shard allocation for a specific index. It can be set to:
* `all` (default) - Allows shard allocation for all shards.
* `primaries` - Allows shard allocation only for primary shards.
* `new_primaries` - Allows shard allocation only for primary shards for new indices.
* `none` - No shard allocation is allowed.
`index.routing.rebalance.enable`::
Enables shard rebalancing for a specific index. It can be set to:
* `all` (default) - Allows shard rebalancing for all shards.
* `primaries` - Allows shard rebalancing only for primary shards.
* `replicas` - Allows shard rebalancing only for replica shards.
* `none` - No shard rebalancing is allowed.
`index.routing.allocation.total_shards_per_node`::
Controls the total number of shards (replicas and primaries) allowed to be allocated on a single node. Defaults to unbounded (`-1`).
`index.recovery.initial_shards`::
When using local gateway a particular shard is recovered only if there can be allocated quorum shards in the cluster. It can be set to:
* `quorum` (default)
* `quorum-1` (or `half`)
* `full`
* `full-1`.
* Number values are also supported, e.g. `1`.
`index.gc_deletes`::
experimental[]
`index.ttl.disable_purge`::
experimental[] Disables temporarily the purge of expired docs.
<<index-modules-store,store level throttling>>::
All the settings for the store level throttling policy currently configured.
`index.translog.fs.type`::
experimental[] Either `simple` or `buffered` (default).
<<index-modules-slowlog>>::
All the settings for slow log.
`index.warmer.enabled`::
See <<indices-warmers>>. Defaults to `true`.
The list of per-index settings which can be updated dynamically on live
indices can be found in <<index-modules>>.
[float]
[[bulk]]

View File

@ -56,10 +56,10 @@ value as a numeric type).
The `index.mapping.coerce` global setting can be set on the
index level to coerce numeric content globally across all
mapping types (The default setting is true and coercions attempted are
mapping types (The default setting is true and coercions attempted are
to convert strings with numbers into numeric types and also numeric values
with fractions to any integer/short/long values minus the fraction part).
When the permitted conversions fail in their attempts, the value is considered
When the permitted conversions fail in their attempts, the value is considered
malformed and the ignore_malformed setting dictates what will happen next.
--
@ -69,6 +69,8 @@ include::mapping/types.asciidoc[]
include::mapping/date-format.asciidoc[]
include::mapping/fielddata_formats.asciidoc[]
include::mapping/dynamic-mapping.asciidoc[]
include::mapping/meta.asciidoc[]

View File

@ -200,9 +200,14 @@ year.
year, and two digit day of month.
|`epoch_second`|A formatter for the number of seconds since the epoch.
Note, that this timestamp allows a max length of 10 chars, so dates
older than 1653 and 2286 are not supported. You should use a different
date formatter in that case.
|`epoch_millis`|A formatter for the number of milliseconds since
the epoch.
|`epoch_millis`|A formatter for the number of milliseconds since the epoch.
Note, that this timestamp allows a max length of 13 chars, so dates
older than 1653 and 2286 are not supported. You should use a different
date formatter in that case.
|=======================================================================
[float]

View File

@ -1,87 +1,5 @@
[[index-modules-fielddata]]
== Field data
The field data cache is used mainly when sorting on or computing aggregations
on a field. It loads all the field values to memory in order to provide fast
document based access to those values. The field data cache can be
expensive to build for a field, so its recommended to have enough memory
to allocate it, and to keep it loaded.
The amount of memory used for the field
data cache can be controlled using `indices.fielddata.cache.size`. Note:
reloading the field data which does not fit into your cache will be expensive
and perform poorly.
[cols="<,<",options="header",]
|=======================================================================
|Setting |Description
|`indices.fielddata.cache.size` |The max size of the field data cache,
eg `30%` of node heap space, or an absolute value, eg `12GB`. Defaults
to unbounded.
|`indices.fielddata.cache.expire` |experimental[] A time based setting that expires
field data after a certain time of inactivity. Defaults to `-1`. For
example, can be set to `5m` for a 5 minute expiry.
|=======================================================================
[float]
[[circuit-breaker]]
=== Circuit Breaker
Elasticsearch contains multiple circuit breakers used to prevent operations from
causing an OutOfMemoryError. Each breaker specifies a limit for how much memory
it can use. Additionally, there is a parent-level breaker that specifies the
total amount of memory that can be used across all breakers.
The parent-level breaker can be configured with the following setting:
`indices.breaker.total.limit`::
Starting limit for overall parent breaker, defaults to 70% of JVM heap
All circuit breaker settings can be changed dynamically using the cluster update
settings API.
[float]
[[fielddata-circuit-breaker]]
==== Field data circuit breaker
The field data circuit breaker allows Elasticsearch to estimate the amount of
memory a field will require to be loaded into memory. It can then prevent the
field data loading by raising an exception. By default the limit is configured
to 60% of the maximum JVM heap. It can be configured with the following
parameters:
`indices.breaker.fielddata.limit`::
Limit for fielddata breaker, defaults to 60% of JVM heap
`indices.breaker.fielddata.overhead`::
A constant that all field data estimations are multiplied with to determine a
final estimation. Defaults to 1.03
[float]
[[request-circuit-breaker]]
==== Request circuit breaker
The request circuit breaker allows Elasticsearch to prevent per-request data
structures (for example, memory used for calculating aggregations during a
request) from exceeding a certain amount of memory.
`indices.breaker.request.limit`::
Limit for request breaker, defaults to 40% of JVM heap
`indices.breaker.request.overhead`::
A constant that all request estimations are multiplied with to determine a
final estimation. Defaults to 1
[float]
[[fielddata-monitoring]]
=== Monitoring field data
You can monitor memory usage for field data as well as the field data circuit
breaker using
<<cluster-nodes-stats,Nodes Stats API>>
[[fielddata-formats]]
== Field data formats
== Fielddata formats
The field data format controls how field data should be stored.
@ -111,7 +29,7 @@ It is possible to change the field data format (and the field data settings
in general) on a live index by using the update mapping API.
[float]
==== String field data types
=== String field data types
`paged_bytes` (default on analyzed string fields)::
Stores unique terms sequentially in a large buffer and maps documents to
@ -123,7 +41,7 @@ in general) on a live index by using the update mapping API.
`not_analyzed`).
[float]
==== Numeric field data types
=== Numeric field data types
`array`::
Stores field values in memory using arrays.
@ -132,7 +50,7 @@ in general) on a live index by using the update mapping API.
Computes and stores field data data-structures on disk at indexing time.
[float]
==== Geo point field data types
=== Geo point field data types
`array`::
Stores latitudes and longitudes in arrays.
@ -142,7 +60,7 @@ in general) on a live index by using the update mapping API.
[float]
[[global-ordinals]]
==== Global ordinals
=== Global ordinals
Global ordinals is a data-structure on top of field data, that maintains an
incremental numbering for all the terms in field data in a lexicographic order.

View File

@ -2,8 +2,7 @@
=== `_timestamp`
The `_timestamp` field allows to automatically index the timestamp of a
document. It can be provided externally via the index request or in the
`_source`. If it is not provided externally it will be automatically set
document. If it is not provided it will be automatically set
to a <<mapping-timestamp-field-default,default date>>.
[float]
@ -21,44 +20,6 @@ should be defined:
}
--------------------------------------------------
[float]
==== store / index
By default the `_timestamp` field has `store` set to `true` and `index`
set to `not_analyzed`. It can be queried as a standard date field.
[float]
==== path
The `_timestamp` value can be provided as an external value when
indexing. But, it can also be automatically extracted from the document
to index based on a `path`. For example, having the following mapping:
[source,js]
--------------------------------------------------
{
"tweet" : {
"_timestamp" : {
"enabled" : true,
"path" : "post_date"
}
}
}
--------------------------------------------------
Will cause `2009-11-15T14:12:12` to be used as the timestamp value for:
[source,js]
--------------------------------------------------
{
"message" : "You know, for Search",
"post_date" : "2009-11-15T14:12:12"
}
--------------------------------------------------
Note, using `path` without explicit timestamp value provided requires an
additional (though quite fast) parsing phase.
[float]
[[mapping-timestamp-field-format]]
==== format

View File

@ -200,7 +200,7 @@ PUT my_index/_mapping/my_type
Please however note that norms won't be removed instantly, but will be removed
as old segments are merged into new segments as you continue indexing new documents.
Any score computation on a field that has had
Any score computation on a field that has had
norms removed might return inconsistent results since some documents won't have
norms anymore while other documents might still have norms.
@ -484,7 +484,7 @@ binary type:
It is possible to control which field values are loaded into memory,
which is particularly useful for aggregations on string fields, using
fielddata filters, which are explained in detail in the
<<index-modules-fielddata,Fielddata>> section.
<<modules-fielddata,Fielddata>> section.
Fielddata filters can exclude terms which do not match a regex, or which
don't fall between a `min` and `max` frequency range:

View File

@ -295,12 +295,23 @@ to provide special features. They now have limited configuration options.
* `_boost` has been removed.
* `_field_names` configuration is limited to disabling the field.
* `_size` configuration is limited to enabling the field.
* `_timestamp` configuration is limited to enabling the field, setting format and default value
==== Meta fields in documents
Meta fields can no longer be specified within a document. They should be specified
via the API. For example, instead of adding a field `_parent` within a document,
use the `parent` url parameter when indexing that document.
==== Date format does not support unix timestamps by default
In earlier versions of elasticsearch, every timestamp was always tried to be parsed as
as unix timestamp first. This means, even when specifying a date format like
`dateOptionalTime`, one could supply unix timestamps instead of a ISO8601 formatted
date.
This is not supported anymore. If you want to store unix timestamps, you need to specify
the appropriate formats in the mapping, namely `epoch_second` or `epoch_millis`.
==== Source field limitations
The `_source` field could previously be disabled dynamically. Since this field
is a critical piece of many features like the Update API, it is no longer

View File

@ -1,6 +1,75 @@
[[modules]]
= Modules
[partintro]
--
This section contains modules responsible for various aspects of the functionality in Elasticsearch. Each module has settings which may be:
_static_::
These settings must be set at the node level, either in the
`elasticsearch.yml` file, or as an environment variable or on the command line
when starting a node. They must be set on every relevant node in the cluster.
_dynamic_::
These settings can be dynamically updated on a live cluster with the
<<cluster-update-settings,cluster-update-settings>> API.
The modules in this section are:
<<modules-cluster,Cluster-level routing and shard allocation>>::
Settings to control where, when, and how shards are allocated to nodes.
<<modules-discovery,Discovery>>::
How nodes discover each other to form a cluster.
<<modules-gateway,Gateway>>::
How many nodes need to join the cluster before recovery can start.
<<modules-http,HTTP>>::
Settings to control the HTTP REST interface.
<<modules-indices,Indices>>::
Global index-related settings.
<<modules-network,Network>>::
Controls default network settings.
<<modules-node,Node client>>::
A Java node client joins the cluster, but doesn't hold data or act as a master node.
<<modules-plugins,Plugins>>::
Using plugins to extend Elasticsearch.
<<modules-scripting,Scripting>>::
Custom scripting available in Lucene Expressions, Groovy, Python, and
Javascript.
<<modules-snapshots,Snapshot/Restore>>::
Backup your data with snapshot/restore.
<<modules-threadpool,Thread pools>>::
Information about the dedicated thread pools used in Elasticsearch.
<<modules-transport,Transport>>::
Configure the transport networking layer, used internally by Elasticsearch
to communicate between nodes.
--
include::modules/cluster.asciidoc[]
include::modules/discovery.asciidoc[]
@ -15,19 +84,20 @@ include::modules/network.asciidoc[]
include::modules/node.asciidoc[]
include::modules/tribe.asciidoc[]
include::modules/plugins.asciidoc[]
include::modules/scripting.asciidoc[]
include::modules/advanced-scripting.asciidoc[]
include::modules/snapshots.asciidoc[]
include::modules/threadpool.asciidoc[]
include::modules/transport.asciidoc[]
include::modules/snapshots.asciidoc[]
include::modules/tribe.asciidoc[]

View File

@ -1,5 +1,5 @@
[[modules-advanced-scripting]]
== Text scoring in scripts
=== Text scoring in scripts
Text features, such as term or document frequency for a specific term can be accessed in scripts (see <<modules-scripting, scripting documentation>> ) with the `_index` variable. This can be useful if, for example, you want to implement your own scoring model using for example a script inside a <<query-dsl-function-score-query,function score query>>.
@ -7,7 +7,7 @@ Statistics over the document collection are computed *per shard*, not per
index.
[float]
=== Nomenclature:
==== Nomenclature:
[horizontal]
@ -33,7 +33,7 @@ depending on the shard the current document resides in.
[float]
=== Shard statistics:
==== Shard statistics:
`_index.numDocs()`::
@ -49,7 +49,7 @@ depending on the shard the current document resides in.
[float]
=== Field statistics:
==== Field statistics:
Field statistics can be accessed with a subscript operator like this:
`_index['FIELD']`.
@ -74,7 +74,7 @@ depending on the shard the current document resides in.
The number of terms in a field cannot be accessed using the `_index` variable. See <<mapping-core-types, word count mapping type>> on how to do that.
[float]
=== Term statistics:
==== Term statistics:
Term statistics for a field can be accessed with a subscript operator like
this: `_index['FIELD']['TERM']`. This will never return null, even if term or field does not exist.
@ -101,7 +101,7 @@ affect is your set the `index_options` to `docs` (see <<mapping-core-types, mapp
[float]
=== Term positions, offsets and payloads:
==== Term positions, offsets and payloads:
If you need information on the positions of terms in a field, call
`_index['FIELD'].get('TERM', flag)` where flag can be
@ -174,7 +174,7 @@ return score;
[float]
=== Term vectors:
==== Term vectors:
The `_index` variable can only be used to gather statistics for single terms. If you want to use information on all terms in a field, you must store the term vectors (set `term_vector` in the mapping as described in the <<mapping-core-types,mapping documentation>>). To access them, call
`_index.termVectors()` to get a

View File

@ -1,253 +1,36 @@
[[modules-cluster]]
== Cluster
[float]
[[shards-allocation]]
=== Shards Allocation
One of the main roles of the master is to decide which shards to allocate to
which nodes, and when to move shards between nodes in order to rebalance the
cluster.
Shards allocation is the process of allocating shards to nodes. This can
happen during initial recovery, replica allocation, rebalancing, or
handling nodes being added or removed.
There are a number of settings available to control the shard allocation process:
The following settings may be used:
* <<shards-allocation>> lists the settings to control the allocation an
rebalancing operations.
`cluster.routing.allocation.allow_rebalance`::
Allow to control when rebalancing will happen based on the total
state of all the indices shards in the cluster. `always`,
`indices_primaries_active`, and `indices_all_active` are allowed,
defaulting to `indices_all_active` to reduce chatter during
initial recovery.
* <<disk-allocator>> explains how Elasticsearch takes available disk space
into account, and the related settings.
* <<allocation-awareness>> and <<forced-awareness>> control how shards can
be distributed across different racks or availability zones.
`cluster.routing.allocation.cluster_concurrent_rebalance`::
Allow to control how many concurrent rebalancing of shards are
allowed cluster wide, and default it to `2`.
* <<allocation-filtering>> allows certain nodes or groups of nodes excluded
from allocation so that they can be decommisioned.
Besides these, there are a few other <<misc-cluster,miscellaneous cluster-level settings>>.
`cluster.routing.allocation.node_initial_primaries_recoveries`::
Allow to control specifically the number of initial recoveries
of primaries that are allowed per node. Since most times local
gateway is used, those should be fast and we can handle more of
those per node without creating load. Defaults to `4`.
All of the settings in this section are _dynamic_ settings which can be
updated on a live cluster with the
<<cluster-update-settings,cluster-update-settings>> API.
include::cluster/shards_allocation.asciidoc[]
`cluster.routing.allocation.node_concurrent_recoveries`::
How many concurrent recoveries are allowed to happen on a node.
Defaults to `2`.
include::cluster/disk_allocator.asciidoc[]
`cluster.routing.allocation.enable`::
include::cluster/allocation_awareness.asciidoc[]
Controls shard allocation for all indices, by allowing specific
kinds of shard to be allocated.
+
--
Can be set to:
include::cluster/allocation_filtering.asciidoc[]
* `all` - (default) Allows shard allocation for all kinds of shards.
* `primaries` - Allows shard allocation only for primary shards.
* `new_primaries` - Allows shard allocation only for primary shards for new indices.
* `none` - No shard allocations of any kind are allowed for all indices.
--
`cluster.routing.rebalance.enable`::
Controls shard rebalance for all indices, by allowing specific
kinds of shard to be rebalanced.
+
--
Can be set to:
* `all` - (default) Allows shard balancing for all kinds of shards.
* `primaries` - Allows shard balancing only for primary shards.
* `replicas` - Allows shard balancing only for replica shards.
* `none` - No shard balancing of any kind are allowed for all indices.
--
`cluster.routing.allocation.same_shard.host`::
Allows to perform a check to prevent allocation of multiple instances
of the same shard on a single host, based on host name and host address.
Defaults to `false`, meaning that no check is performed by default. This
setting only applies if multiple nodes are started on the same machine.
`indices.recovery.concurrent_streams`::
The number of streams to open (on a *node* level) to recover a
shard from a peer shard. Defaults to `3`.
`indices.recovery.concurrent_small_file_streams`::
The number of streams to open (on a *node* level) for small files (under
5mb) to recover a shard from a peer shard. Defaults to `2`.
[float]
[[allocation-awareness]]
=== Shard Allocation Awareness
Cluster allocation awareness allows to configure shard and replicas
allocation across generic attributes associated the nodes. Lets explain
it through an example:
Assume we have several racks. When we start a node, we can configure an
attribute called `rack_id` (any attribute name works), for example, here
is a sample config:
----------------------
node.rack_id: rack_one
----------------------
The above sets an attribute called `rack_id` for the relevant node with
a value of `rack_one`. Now, we need to configure the `rack_id` attribute
as one of the awareness allocation attributes (set it on *all* (master
eligible) nodes config):
--------------------------------------------------------
cluster.routing.allocation.awareness.attributes: rack_id
--------------------------------------------------------
The above will mean that the `rack_id` attribute will be used to do
awareness based allocation of shard and its replicas. For example, lets
say we start 2 nodes with `node.rack_id` set to `rack_one`, and deploy a
single index with 5 shards and 1 replica. The index will be fully
deployed on the current nodes (5 shards and 1 replica each, total of 10
shards).
Now, if we start two more nodes, with `node.rack_id` set to `rack_two`,
shards will relocate to even the number of shards across the nodes, but,
a shard and its replica will not be allocated in the same `rack_id`
value.
The awareness attributes can hold several values, for example:
-------------------------------------------------------------
cluster.routing.allocation.awareness.attributes: rack_id,zone
-------------------------------------------------------------
*NOTE*: When using awareness attributes, shards will not be allocated to
nodes that don't have values set for those attributes.
[float]
[[forced-awareness]]
=== Forced Awareness
Sometimes, we know in advance the number of values an awareness
attribute can have, and more over, we would like never to have more
replicas than needed allocated on a specific group of nodes with the
same awareness attribute value. For that, we can force awareness on
specific attributes.
For example, lets say we have an awareness attribute called `zone`, and
we know we are going to have two zones, `zone1` and `zone2`. Here is how
we can force awareness on a node:
[source,js]
-------------------------------------------------------------------
cluster.routing.allocation.awareness.force.zone.values: zone1,zone2
cluster.routing.allocation.awareness.attributes: zone
-------------------------------------------------------------------
Now, lets say we start 2 nodes with `node.zone` set to `zone1` and
create an index with 5 shards and 1 replica. The index will be created,
but only 5 shards will be allocated (with no replicas). Only when we
start more shards with `node.zone` set to `zone2` will the replicas be
allocated.
[float]
==== Automatic Preference When Searching / GETing
When executing a search, or doing a get, the node receiving the request
will prefer to execute the request on shards that exists on nodes that
have the same attribute values as the executing node. This only happens
when the `cluster.routing.allocation.awareness.attributes` setting has
been set to a value.
[float]
==== Realtime Settings Update
The settings can be updated using the <<cluster-update-settings,cluster update settings API>> on a live cluster.
[float]
[[allocation-filtering]]
=== Shard Allocation Filtering
Allow to control allocation of indices on nodes based on include/exclude
filters. The filters can be set both on the index level and on the
cluster level. Lets start with an example of setting it on the cluster
level:
Lets say we have 4 nodes, each has specific attribute called `tag`
associated with it (the name of the attribute can be any name). Each
node has a specific value associated with `tag`. Node 1 has a setting
`node.tag: value1`, Node 2 a setting of `node.tag: value2`, and so on.
We can create an index that will only deploy on nodes that have `tag`
set to `value1` and `value2` by setting
`index.routing.allocation.include.tag` to `value1,value2`. For example:
[source,js]
--------------------------------------------------
curl -XPUT localhost:9200/test/_settings -d '{
"index.routing.allocation.include.tag" : "value1,value2"
}'
--------------------------------------------------
On the other hand, we can create an index that will be deployed on all
nodes except for nodes with a `tag` of value `value3` by setting
`index.routing.allocation.exclude.tag` to `value3`. For example:
[source,js]
--------------------------------------------------
curl -XPUT localhost:9200/test/_settings -d '{
"index.routing.allocation.exclude.tag" : "value3"
}'
--------------------------------------------------
`index.routing.allocation.require.*` can be used to
specify a number of rules, all of which MUST match in order for a shard
to be allocated to a node. This is in contrast to `include` which will
include a node if ANY rule matches.
The `include`, `exclude` and `require` values can have generic simple
matching wildcards, for example, `value1*`. A special attribute name
called `_ip` can be used to match on node ip values. In addition `_host`
attribute can be used to match on either the node's hostname or its ip
address. Similarly `_name` and `_id` attributes can be used to match on
node name and node id accordingly.
Obviously a node can have several attributes associated with it, and
both the attribute name and value are controlled in the setting. For
example, here is a sample of several node configurations:
[source,js]
--------------------------------------------------
node.group1: group1_value1
node.group2: group2_value4
--------------------------------------------------
In the same manner, `include`, `exclude` and `require` can work against
several attributes, for example:
[source,js]
--------------------------------------------------
curl -XPUT localhost:9200/test/_settings -d '{
"index.routing.allocation.include.group1" : "xxx",
"index.routing.allocation.include.group2" : "yyy",
"index.routing.allocation.exclude.group3" : "zzz",
"index.routing.allocation.require.group4" : "aaa"
}'
--------------------------------------------------
The provided settings can also be updated in real time using the update
settings API, allowing to "move" indices (shards) around in realtime.
Cluster wide filtering can also be defined, and be updated in real time
using the cluster update settings API. This setting can come in handy
for things like decommissioning nodes (even if the replica count is set
to 0). Here is a sample of how to decommission a node based on `_ip`
address:
[source,js]
--------------------------------------------------
curl -XPUT localhost:9200/_cluster/settings -d '{
"transient" : {
"cluster.routing.allocation.exclude._ip" : "10.0.0.1"
}
}'
--------------------------------------------------
include::cluster/misc.asciidoc[]

View File

@ -0,0 +1,107 @@
[[allocation-awareness]]
=== Shard Allocation Awareness
When running nodes on multiple VMs on the same physical server, on multiple
racks, or across multiple awareness zones, it is more likely that two nodes on
the same physical server, in the same rack, or in the same awareness zone will
crash at the same time, rather than two unrelated nodes crashing
simultaneously.
If Elasticsearch is _aware_ of the physical configuration of your hardware, it
can ensure that the primary shard and its replica shards are spread across
different physical servers, racks, or zones, to minimise the risk of losing
all shard copies at the same time.
The shard allocation awareness settings allow you to tell Elasticsearch about
your hardware configuration.
As an example, let's assume we have several racks. When we start a node, we
can tell it which rack it is in by assigning it an arbitrary metadata
attribute called `rack_id` -- we could use any attribute name. For example:
[source,sh]
----------------------
./bin/elasticsearch --node.rack_id rack_one <1>
----------------------
<1> This setting could also be specified in the `elasticsearch.yml` config file.
Now, we need to setup _shard allocation awareness_ by telling Elasticsearch
which attributes to use. This can be configured in the `elasticsearch.yml`
file on *all* master-eligible nodes, or it can be set (and changed) with the
<<cluster-update-settings,cluster-update-settings>> API.
For our example, we'll set the value in the config file:
[source,yaml]
--------------------------------------------------------
cluster.routing.allocation.awareness.attributes: rack_id
--------------------------------------------------------
With this config in place, let's say we start two nodes with `node.rack_id`
set to `rack_one`, and we create an index with 5 primary shards and 1 replica
of each primary. All primaries and replicas are allocated across the two
nodes.
Now, if we start two more nodes with `node.rack_id` set to `rack_two`,
Elasticsearch will move shards across to the new nodes, ensuring (if possible)
that the primary and replica shards are never in the same rack.
.Prefer local shards
*********************************************
When executing search or GET requests, with shard awareness enabled,
Elasticsearch will prefer using local shards -- shards in the same awareness
group -- to execute the request. This is usually faster than crossing racks or
awareness zones.
*********************************************
Multiple awareness attributes can be specified, in which case the combination
of values from each attribute is considered to be a separate value.
[source,yaml]
-------------------------------------------------------------
cluster.routing.allocation.awareness.attributes: rack_id,zone
-------------------------------------------------------------
NOTE: When using awareness attributes, shards will not be allocated to
nodes that don't have values set for those attributes.
[float]
[[forced-awareness]]
=== Forced Awareness
Imagine that you have two awareness zones and enough hardware across the two
zones to host all of your primary and replica shards. But perhaps the
hardware in a single zone, while sufficient to host half the shards, would be
unable to host *ALL* the shards.
With ordinary awareness, if one zone lost contact with the other zone,
Elasticsearch would assign all of the missing replica shards to a single zone.
But in this example, this sudden extra load would cause the hardware in the
remaining zone to be overloaded.
Forced awareness solves this problem by *NEVER* allowing copies of the same
shard to be allocated to the same zone.
For example, lets say we have an awareness attribute called `zone`, and
we know we are going to have two zones, `zone1` and `zone2`. Here is how
we can force awareness on a node:
[source,yaml]
-------------------------------------------------------------------
cluster.routing.allocation.awareness.force.zone.values: zone1,zone2 <1>
cluster.routing.allocation.awareness.attributes: zone
-------------------------------------------------------------------
<1> We must list all possible values that the `zone` attribute can have.
Now, if we start 2 nodes with `node.zone` set to `zone1` and create an index
with 5 shards and 1 replica. The index will be created, but only the 5 primary
shards will be allocated (with no replicas). Only when we start more shards
with `node.zone` set to `zone2` will the replicas be allocated.
The `cluster.routing.allocation.awareness.*` settings can all be updated
dynamically on a live cluster with the
<<cluster-update-settings,cluster-update-settings>> API.

View File

@ -0,0 +1,70 @@
[[allocation-filtering]]
=== Shard Allocation Filtering
While <<index-modules-allocation>> provides *per-index* settings to control the
allocation of shards to nodes, cluster-level shard allocation filtering allows
you to allow or disallow the allocation of shards from *any* index to
particular nodes.
The typical use case for cluster-wide shard allocation filtering is when you
want to decommision a node, and you would like to move the shards from that
node to other nodes in the cluster before shutting it down.
For instance, we could decomission a node using its IP address as follows:
[source,json]
--------------------------------------------------
PUT /_cluster/settings
{
"transient" : {
"cluster.routing.allocation.exclude._ip" : "10.0.0.1"
}
}
--------------------------------------------------
// AUTOSENSE
NOTE: Shards will only be relocated if it is possible to do so without
breaking another routing constraint, such as never allocating a primary and
replica shard to the same node.
Cluster-wide shard allocation filtering works in the same way as index-level
shard allocation filtering (see <<index-modules-allocation>> for details).
The available _dynamic_ cluster settings are as follows, where `{attribute}`
refers to an arbitrary node attribute.:
`cluster.routing.allocation.include.{attribute}`::
Assign the index to a node whose `{attribute}` has at least one of the
comma-separated values.
`cluster.routing.allocation.require.{attribute}`::
Assign the index to a node whose `{attribute}` has _all_ of the
comma-separated values.
`cluster.routing.allocation.exclude.{attribute}`::
Assign the index to a node whose `{attribute}` has _none_ of the
comma-separated values.
These special attributes are also supported:
[horizontal]
`_name`:: Match nodes by node name
`_ip`:: Match nodes by IP address (the IP address associated with the hostname)
`_host`:: Match nodes by hostname
All attribute values can be specified with wildcards, eg:
[source,json]
------------------------
PUT _cluster/settings
{
"transient": {
"cluster.routing.allocation.include._ip": "192.168.2.*"
}
}
------------------------
// AUTOSENSE

View File

@ -0,0 +1,69 @@
[[disk-allocator]]
=== Disk-based Shard Allocation
Elasticsearch factors in the available disk space on a node before deciding
whether to allocate new shards to that node or to actively relocate shards
away from that node.
Below are the settings that can be configred in the `elasticsearch.yml` config
file or updated dynamically on a live cluster with the
<<cluster-update-settings,cluster-update-settings>> API:
`cluster.routing.allocation.disk.threshold_enabled`::
Defaults to `true`. Set to `false` to disable the disk allocation decider.
`cluster.routing.allocation.disk.watermark.low`::
Controls the low watermark for disk usage. It defaults to 85%, meaning ES will
not allocate new shards to nodes once they have more than 85% disk used. It
can also be set to an absolute byte value (like 500mb) to prevent ES from
allocating shards if less than the configured amount of space is available.
`cluster.routing.allocation.disk.watermark.high`::
Controls the high watermark. It defaults to 90%, meaning ES will attempt to
relocate shards to another node if the node disk usage rises above 90%. It can
also be set to an absolute byte value (similar to the low watermark) to
relocate shards once less than the configured amount of space is available on
the node.
NOTE: Percentage values refer to used disk space, while byte values refer to
free disk space. This can be confusing, since it flips the meaning of high and
low. For example, it makes sense to set the low watermark to 10gb and the high
watermark to 5gb, but not the other way around.
`cluster.info.update.interval`::
How often Elasticsearch should check on disk usage for each node in the
cluster. Defaults to `30s`.
`cluster.routing.allocation.disk.include_relocations`::
Defaults to +true+, which means that Elasticsearch will take into account
shards that are currently being relocated to the target node when computing a
node's disk usage. Taking relocating shards' sizes into account may, however,
mean that the disk usage for a node is incorrectly estimated on the high side,
since the relocation could be 90% complete and a recently retrieved disk usage
would include the total size of the relocating shard as well as the space
already used by the running relocation.
An example of updating the low watermark to no more than 80% of the disk size, a
high watermark of at least 50 gigabytes free, and updating the information about
the cluster every minute:
[source,js]
--------------------------------------------------
PUT /_cluster/settings
{
"transient": {
"cluster.routing.allocation.disk.watermark.low": "80%",
"cluster.routing.allocation.disk.watermark.high": "50gb",
"cluster.info.update.interval": "1m"
}
}
--------------------------------------------------
// AUTOSENSE

View File

@ -0,0 +1,36 @@
[[misc-cluster]]
=== Miscellaneous cluster settings
[[cluster-read-only]]
==== Metadata
An entire cluster may be set to read-only with the following _dynamic_ setting:
`cluster.blocks.read_only`::
Make the whole cluster read only (indices do not accept write
operations), metadata is not allowed to be modified (create or delete
indices).
WARNING: Don't rely on this setting to prevent changes to your cluster. Any
user with access to the <<cluster-update-settings,cluster-update-settings>>
API can make the cluster read-write again.
[[cluster-logger]]
==== Logger
The settings which control logging can be updated dynamically with the
`logger.` prefix. For instance, to increase the logging level of the
`indices.recovery` module to `DEBUG`, issue this request:
[source,json]
-------------------------------
PUT /_cluster/settings
{
"transient": {
"logger.indices.recovery": "DEBUG"
}
}
-------------------------------

View File

@ -0,0 +1,124 @@
[[shards-allocation]]
=== Cluster Level Shard Allocation
Shard allocation is the process of allocating shards to nodes. This can
happen during initial recovery, replica allocation, rebalancing, or
when nodes are added or removed.
[float]
=== Shard Allocation Settings
The following _dynamic_ settings may be used to control shard allocation and recovery:
`cluster.routing.allocation.enable`::
+
--
Enable or disable allocation for specific kinds of shards:
* `all` - (default) Allows shard allocation for all kinds of shards.
* `primaries` - Allows shard allocation only for primary shards.
* `new_primaries` - Allows shard allocation only for primary shards for new indices.
* `none` - No shard allocations of any kind are allowed for any indices.
This setting does not affect the recovery of local primary shards when
restarting a node. A restarted node that has a copy of an unassigned primary
shard will recover that primary immediately, assuming that the
<<index.recovery.initial_shards,`index.recovery.initial_shards`>> setting is
satisfied.
--
`cluster.routing.allocation.node_concurrent_recoveries`::
How many concurrent shard recoveries are allowed to happen on a node.
Defaults to `2`.
`cluster.routing.allocation.node_initial_primaries_recoveries`::
While the recovery of replicas happens over the network, the recovery of
an unassigned primary after node restart uses data from the local disk.
These should be fast so more initial primary recoveries can happen in
parallel on the same node. Defaults to `4`.
`cluster.routing.allocation.same_shard.host`::
Allows to perform a check to prevent allocation of multiple instances of
the same shard on a single host, based on host name and host address.
Defaults to `false`, meaning that no check is performed by default. This
setting only applies if multiple nodes are started on the same machine.
`indices.recovery.concurrent_streams`::
The number of network streams to open per node to recover a shard from
a peer shard. Defaults to `3`.
`indices.recovery.concurrent_small_file_streams`::
The number of streams to open per node for small files (under 5mb) to
recover a shard from a peer shard. Defaults to `2`.
[float]
=== Shard Rebalancing Settings
The following _dynamic_ settings may be used to control the rebalancing of
shards across the cluster:
`cluster.routing.rebalance.enable`::
+
--
Enable or disable rebalancing for specific kinds of shards:
* `all` - (default) Allows shard balancing for all kinds of shards.
* `primaries` - Allows shard balancing only for primary shards.
* `replicas` - Allows shard balancing only for replica shards.
* `none` - No shard balancing of any kind are allowed for any indices.
--
`cluster.routing.allocation.allow_rebalance`::
+
--
Specify when shard rebalancing is allowed:
* `always` - (default) Always allow rebalancing.
* `indices_primaries_active` - Only when all primaries in the cluster are allocated.
* `indices_all_active` - Only when all shards (primaries and replicas) in the cluster are allocated.
--
`cluster.routing.allocation.cluster_concurrent_rebalance`::
Allow to control how many concurrent shard rebalances are
allowed cluster wide. Defaults to `2`.
[float]
=== Shard Balancing Heuristics
The following settings are used together to determine where to place each
shard. The cluster is balanced when no allowed action can bring the weights
of each node closer together by more then the `balance.threshold`.
`cluster.routing.allocation.balance.shard`::
Defines the weight factor for shards allocated on a node
(float). Defaults to `0.45f`. Raising this raises the tendency to
equalize the number of shards across all nodes in the cluster.
`cluster.routing.allocation.balance.index`::
Defines a factor to the number of shards per index allocated
on a specific node (float). Defaults to `0.55f`. Raising this raises the
tendency to equalize the number of shards per index across all nodes in
the cluster.
`cluster.routing.allocation.balance.threshold`::
Minimal optimization value of operations that should be performed (non
negative float). Defaults to `1.0f`. Raising this will cause the cluster
to be less aggressive about optimizing the shard balance.
NOTE: Regardless of the result of the balancing algorithm, rebalancing might
not be allowed due to forced awareness or allocation filtering.

View File

@ -1,69 +1,51 @@
[[modules-gateway]]
== Gateway
== Local Gateway
The gateway module allows one to store the state of the cluster meta
data across full cluster restarts. The cluster meta data mainly holds
all the indices created with their respective (index level) settings and
explicit type mappings.
The local gateway module stores the cluster state and shard data across full
cluster restarts.
Each time the cluster meta data changes (for example, when an index is
added or deleted), those changes will be persisted using the gateway.
When the cluster first starts up, the state will be read from the
gateway and applied.
The following _static_ settings, which must be set on every data node in the
cluster, controls how long nodes should wait before they try to recover any
shards which are stored locally:
The gateway set on the node level will automatically control the index gateway
that will be used. For example, if the `local` gateway is used (the default),
then each index created on the node will automatically use its own respective
index level `local` gateway.
`gateway.expected_nodes`::
The default gateway used is the
<<modules-gateway-local,local>> gateway.
The number of (data or master) nodes that are expected to be in the cluster.
Recovery of local shards will start as soon as the expected number of
nodes have joined the cluster. Defaults to `0`
The `none` gateway option was removed in Elasticsearch 2.0.
`gateway.expected_master_nodes`::
[float]
[[recover-after]]
=== Recovery After Nodes / Time
The number of master nodes that are expected to be in the cluster.
Recovery of local shards will start as soon as the expected number of
master nodes have joined the cluster. Defaults to `0`
In many cases, the actual cluster meta data should only be recovered
after specific nodes have started in the cluster, or a timeout has
passed. This is handy when restarting the cluster, and each node local
index storage still exists to be reused and not recovered from the
gateway (which reduces the time it takes to recover from the gateway).
`gateway.expected_data_nodes`::
The `gateway.recover_after_nodes` setting (which accepts a number)
controls after how many data and master eligible nodes within the
cluster recovery will start. The `gateway.recover_after_data_nodes` and
`gateway.recover_after_master_nodes` setting work in a similar fashion,
except they consider only the number of data nodes and only the number
of master nodes respectively. The `gateway.recover_after_time` setting
(which accepts a time value) sets the time to wait till recovery happens
once all `gateway.recover_after...nodes` conditions are met.
The number of data nodes that are expected to be in the cluster.
Recovery of local shards will start as soon as the expected number of
data nodes have joined the cluster. Defaults to `0`
The `gateway.expected_nodes` allows to set how many data and master
eligible nodes are expected to be in the cluster, and once met, the
`gateway.recover_after_time` is ignored and recovery starts.
Setting `gateway.expected_nodes` also defaults `gateway.recover_after_time` to `5m` The `gateway.expected_data_nodes` and `gateway.expected_master_nodes`
settings are also supported. For example setting:
`gateway.recover_after_time`::
[source,js]
--------------------------------------------------
gateway:
recover_after_time: 5m
expected_nodes: 2
--------------------------------------------------
If the expected number of nodes is not achieved, the recovery process waits
for the configured amount of time before trying to recover regardless.
Defaults to `5m` if one of the `expected_nodes` settings is configured.
In an expected 2 nodes cluster will cause recovery to start 5 minutes
after the first node is up, but once there are 2 nodes in the cluster,
recovery will begin immediately (without waiting).
Once the `recover_after_time` duration has timed out, recovery will start
as long as the following conditions are met:
Note, once the meta data has been recovered from the gateway (which
indices to create, mappings and so on), then this setting is no longer
effective until the next full restart of the cluster.
`gateway.recover_after_nodes`::
Operations are blocked while the cluster meta data has not been
recovered in order not to mix with the actual cluster meta data that
will be recovered once the settings has been reached.
Recover as long as this many data or master nodes have joined the cluster.
include::gateway/local.asciidoc[]
`gateway.recover_after_master_nodes`::
Recover as long as this many master nodes have joined the cluster.
`gateway.recover_after_data_nodes`::
Recover as long as this many data nodes have joined the cluster.
NOTE: These settings only take effect on a full cluster restart.

View File

@ -1,56 +0,0 @@
[[modules-gateway-local]]
=== Local Gateway
The local gateway allows for recovery of the full cluster state and
indices from the local storage of each node, and does not require a
common node level shared storage.
Note, different from shared gateway types, the persistency to the local
gateway is *not* done in an async manner. Once an operation is
performed, the data is there for the local gateway to recover it in case
of full cluster failure.
It is important to configure the `gateway.recover_after_nodes` setting
to include most of the expected nodes to be started after a full cluster
restart. This will insure that the latest cluster state is recovered.
For example:
[source,js]
--------------------------------------------------
gateway:
recover_after_nodes: 3
expected_nodes: 5
--------------------------------------------------
[float]
==== Dangling indices
When a node joins the cluster, any shards/indices stored in its local `data/`
directory which do not already exist in the cluster will be imported into the
cluster by default. This functionality has two purposes:
1. If a new master node is started which is unaware of the other indices in
the cluster, adding the old nodes will cause the old indices to be
imported, instead of being deleted.
2. An old index can be added to an existing cluster by copying it to the
`data/` directory of a new node, starting the node and letting it join
the cluster. Once the index has been replicated to other nodes in the
cluster, the new node can be shut down and removed.
The import of dangling indices can be controlled with the
`gateway.auto_import_dangled` which accepts:
[horizontal]
`yes`::
Import dangling indices into the cluster (default).
`close`::
Import dangling indices into the cluster state, but leave them closed.
`no`::
Delete dangling indices after `gateway.dangling_timeout`, which
defaults to 2 hours.

View File

@ -1,66 +1,50 @@
[[modules-indices]]
== Indices
The indices module allow to control settings that are globally managed
for all indices.
The indices module controls index-related settings that are globally managed
for all indices, rather than being configurable at a per-index level.
[float]
[[buffer]]
=== Indexing Buffer
Available settings include:
The indexing buffer setting allows to control how much memory will be
allocated for the indexing process. It is a global setting that bubbles
down to all the different shards allocated on a specific node.
<<circuit-breaker,Circuit breaker>>::
The `indices.memory.index_buffer_size` accepts either a percentage or a
byte size value. It defaults to `10%`, meaning that `10%` of the total
memory allocated to a node will be used as the indexing buffer size.
This amount is then divided between all the different shards. Also, if
percentage is used, it is possible to set `min_index_buffer_size` (defaults to
`48mb`) and `max_index_buffer_size` (defaults to unbounded).
Circuit breakers set limits on memory usage to avoid out of memory exceptions.
The `indices.memory.min_shard_index_buffer_size` allows to set a hard
lower limit for the memory allocated per shard for its own indexing
buffer. It defaults to `4mb`.
<<modules-fielddata,Fielddata cache>>::
[float]
[[indices-ttl]]
=== TTL interval
Set limits on the amount of heap used by the in-memory fielddata cache.
You can dynamically set the `indices.ttl.interval`, which allows to set how
often expired documents will be automatically deleted. The default value
is 60s.
<<filter-cache,Node filter cache>>::
The deletion orders are processed by bulk. You can set
`indices.ttl.bulk_size` to fit your needs. The default value is 10000.
Configure the amount heap used to cache filter results.
See also <<mapping-ttl-field>>.
<<indexing-buffer,Indexing buffer>>::
[float]
[[recovery]]
=== Recovery
Control the size of the buffer allocated to the indexing process.
The following settings can be set to manage the recovery policy:
<<shard-query-cache,Shard query cache>>::
[horizontal]
`indices.recovery.concurrent_streams`::
defaults to `3`.
Control the behaviour of the shard-level query cache.
`indices.recovery.concurrent_small_file_streams`::
defaults to `2`.
<<recovery,Recovery>>::
`indices.recovery.file_chunk_size`::
defaults to `512kb`.
Control the resource limits on the shard recovery process.
`indices.recovery.translog_ops`::
defaults to `1000`.
<<indices-ttl,TTL interval>>::
`indices.recovery.translog_size`::
defaults to `512kb`.
Control how expired documents are removed.
`indices.recovery.compress`::
defaults to `true`.
include::indices/circuit_breaker.asciidoc[]
`indices.recovery.max_bytes_per_sec`::
defaults to `40mb`.
include::indices/fielddata.asciidoc[]
include::indices/filter_cache.asciidoc[]
include::indices/indexing_buffer.asciidoc[]
include::indices/query-cache.asciidoc[]
include::indices/recovery.asciidoc[]
include::indices/ttl_interval.asciidoc[]

View File

@ -0,0 +1,56 @@
[[circuit-breaker]]
=== Circuit Breaker
Elasticsearch contains multiple circuit breakers used to prevent operations from
causing an OutOfMemoryError. Each breaker specifies a limit for how much memory
it can use. Additionally, there is a parent-level breaker that specifies the
total amount of memory that can be used across all breakers.
These settings can be dynamically updated on a live cluster with the
<<cluster-update-settings,cluster-update-settings>> API.
[[parent-circuit-breaker]]
[float]
==== Parent circuit breaker
The parent-level breaker can be configured with the following setting:
`indices.breaker.total.limit`::
Starting limit for overall parent breaker, defaults to 70% of JVM heap.
[[fielddata-circuit-breaker]]
[float]
==== Field data circuit breaker
The field data circuit breaker allows Elasticsearch to estimate the amount of
memory a field will require to be loaded into memory. It can then prevent the
field data loading by raising an exception. By default the limit is configured
to 60% of the maximum JVM heap. It can be configured with the following
parameters:
`indices.breaker.fielddata.limit`::
Limit for fielddata breaker, defaults to 60% of JVM heap
`indices.breaker.fielddata.overhead`::
A constant that all field data estimations are multiplied with to determine a
final estimation. Defaults to 1.03
[[request-circuit-breaker]]
[float]
==== Request circuit breaker
The request circuit breaker allows Elasticsearch to prevent per-request data
structures (for example, memory used for calculating aggregations during a
request) from exceeding a certain amount of memory.
`indices.breaker.request.limit`::
Limit for request breaker, defaults to 40% of JVM heap
`indices.breaker.request.overhead`::
A constant that all request estimations are multiplied with to determine a
final estimation. Defaults to 1

View File

@ -0,0 +1,37 @@
[[modules-fielddata]]
=== Fielddata
The field data cache is used mainly when sorting on or computing aggregations
on a field. It loads all the field values to memory in order to provide fast
document based access to those values. The field data cache can be
expensive to build for a field, so its recommended to have enough memory
to allocate it, and to keep it loaded.
The amount of memory used for the field
data cache can be controlled using `indices.fielddata.cache.size`. Note:
reloading the field data which does not fit into your cache will be expensive
and perform poorly.
`indices.fielddata.cache.size`::
The max size of the field data cache, eg `30%` of node heap space, or an
absolute value, eg `12GB`. Defaults to unbounded. Also see
<<fielddata-circuit-breaker>>.
`indices.fielddata.cache.expire`::
experimental[] A time based setting that expires field data after a
certain time of inactivity. Defaults to `-1`. For example, can be set to
`5m` for a 5 minute expiry.
NOTE: These are static settings which must be configured on every data node in
the cluster.
[float]
[[fielddata-monitoring]]
==== Monitoring field data
You can monitor memory usage for field data as well as the field data circuit
breaker using
<<cluster-nodes-stats,Nodes Stats API>>

View File

@ -0,0 +1,16 @@
[[filter-cache]]
=== Node Filter Cache
The filter cache is responsible for caching the results of filters (used in
the query). There is one filter cache per node that is shared by all shards.
The cache implements an LRU eviction policy: when a cache becomes full, the
least recently used data is evicted to make way for new data.
The following setting is _static_ and must be configured on every data node in
the cluster:
`indices.cache.filter.size`::
Controls the memory size for the filter cache , defaults to `10%`. Accepts
either a percentage value, like `30%`, or an exact value, like `512mb`.

View File

@ -0,0 +1,32 @@
[[indexing-buffer]]
=== Indexing Buffer
The indexing buffer is used to store newly indexed documents. When it fills
up, the documents in the buffer are written to a segment on disk. It is divided
between all shards on the node.
The following settings are _static_ and must be configured on every data node
in the cluster:
`indices.memory.index_buffer_size`::
Accepts either a percentage or a byte size value. It defaults to `10%`,
meaning that `10%` of the total heap allocated to a node will be used as the
indexing buffer size.
`indices.memory.min_index_buffer_size`::
If the `index_buffer_size` is specified as a percentage, then this
setting can be used to specify an absolute minimum. Defaults to `48mb`.
`indices.memory.max_index_buffer_size`::
If the `index_buffer_size` is specified as a percentage, then this
setting can be used to specify an absolute maximum. Defaults to unbounded.
`indices.memory.min_shard_index_buffer_size`::
Sets a hard lower limit for the memory allocated per shard for its own
indexing buffer. Defaults to `4mb`.

View File

@ -1,5 +1,5 @@
[[index-modules-shard-query-cache]]
== Shard query cache
[[shard-query-cache]]
=== Shard query cache
When a search request is run against an index or against many indices, each
involved shard executes the search locally and returns its local results to
@ -13,7 +13,7 @@ use case, where only the most recent index is being actively updated --
results from older indices will be served directly from the cache.
[IMPORTANT]
==================================
===================================
For now, the query cache will only cache the results of search requests
where `size=0`, so it will not cache `hits`,
@ -21,10 +21,10 @@ but it will cache `hits.total`, <<search-aggregations,aggregations>>, and
<<search-suggesters,suggestions>>.
Queries that use `now` (see <<date-math>>) cannot be cached.
==================================
===================================
[float]
=== Cache invalidation
==== Cache invalidation
The cache is smart -- it keeps the same _near real-time_ promise as uncached
search.
@ -46,7 +46,7 @@ curl -XPOST 'localhost:9200/kimchy,elasticsearch/_cache/clear?query_cache=true'
------------------------
[float]
=== Enabling caching by default
==== Enabling caching by default
The cache is not enabled by default, but can be enabled when creating a new
index as follows:
@ -73,7 +73,7 @@ curl -XPUT localhost:9200/my_index/_settings -d'
-----------------------------
[float]
=== Enabling caching per request
==== Enabling caching per request
The `query_cache` query-string parameter can be used to enable or disable
caching on a *per-query* basis. If set, it overrides the index-level setting:
@ -99,7 +99,7 @@ it uses a random function or references the current time) you should set the
`query_cache` flag to `false` to disable caching for that request.
[float]
=== Cache key
==== Cache key
The whole JSON body is used as the cache key. This means that if the JSON
changes -- for instance if keys are output in a different order -- then the
@ -110,7 +110,7 @@ keys are always emitted in the same order. This canonical mode can be used in
the application to ensure that a request is always serialized in the same way.
[float]
=== Cache settings
==== Cache settings
The cache is managed at the node level, and has a default maximum size of `1%`
of the heap. This can be changed in the `config/elasticsearch.yml` file with:
@ -126,7 +126,7 @@ stale results are automatically invalidated when the index is refreshed. This
setting is provided for completeness' sake only.
[float]
=== Monitoring cache usage
==== Monitoring cache usage
The size of the cache (in bytes) and the number of evictions can be viewed
by index, with the <<indices-stats,`indices-stats`>> API:

View File

@ -0,0 +1,28 @@
[[recovery]]
=== Indices Recovery
The following _expert_ settings can be set to manage the recovery policy.
`indices.recovery.concurrent_streams`::
Defaults to `3`.
`indices.recovery.concurrent_small_file_streams`::
Defaults to `2`.
`indices.recovery.file_chunk_size`::
Defaults to `512kb`.
`indices.recovery.translog_ops`::
Defaults to `1000`.
`indices.recovery.translog_size`::
Defaults to `512kb`.
`indices.recovery.compress`::
Defaults to `true`.
`indices.recovery.max_bytes_per_sec`::
Defaults to `40mb`.
These settings can be dynamically updated on a live cluster with the
<<cluster-update-settings,cluster-update-settings>> API:

View File

@ -0,0 +1,16 @@
[[indices-ttl]]
=== TTL interval
Documents that have a <<mapping-ttl-field,`ttl`>> value set need to be deleted
once they have expired. How and how often they are deleted is controlled by
the following dynamic cluster settings:
`indices.ttl.interval`::
How often the deletion process runs. Defaults to `60s`.
`indices.ttl.bulk_size`::
The deletions are processed with a <<docs-bulk,bulk request>>.
The number of deletions processed can be configured with
this settings. Defaults to `10000`.

View File

@ -22,7 +22,7 @@ Installing plugins typically take the following form:
[source,shell]
-----------------------------------
plugin --install <org>/<user/component>/<version>
bin/plugin --install <org>/<user/component>/<version>
-----------------------------------
The plugins will be

View File

@ -9,7 +9,6 @@ of discarded.
There are several thread pools, but the important ones include:
[horizontal]
`index`::
For index/delete operations. Defaults to `fixed`
with a size of `# of available processors`,

View File

@ -40,9 +40,14 @@ creating composite queries.
|`flags` |Flags specifying which features of the `simple_query_string` to
enable. Defaults to `ALL`.
|`lowercase_expanded_terms` | Whether terms of prefix and fuzzy queries are to
|`lowercase_expanded_terms` | Whether terms of prefix and fuzzy queries should
be automatically lower-cased or not (since they are not analyzed). Defaults to
true.
`true`.
|`analyze_wildcard` | Whether terms of prefix queries should be automatically
analyzed or not. If `true` a best effort will be made to analyze the prefix. However,
some analyzers will be not able to provide a meaningful results
based just on the prefix of a term. Defaults to `false`.
|`locale` | Locale that should be used for string conversions.
Defaults to `ROOT`.

View File

@ -73,7 +73,7 @@ And here is a sample response:
Set to `true` or `false` to enable or disable the caching
of search results for requests where `size` is 0, ie
aggregations and suggestions (no top hits returned).
See <<index-modules-shard-query-cache>>.
See <<shard-query-cache>>.
`terminate_after`::

View File

@ -44,6 +44,32 @@ import java.util.Map;
import static org.elasticsearch.action.ValidateActions.addValidationError;
import static org.elasticsearch.search.Scroll.readScroll;
/**
* Creates a new {@link DeleteByQueryRequest}. Delete-by-query is since elasticsearch 2.0.0 moved into a plugin
* and is not part of elasticsearch core. In contrast to the previous, in-core, implementation delete-by-query now
* uses scan/scroll and the returned IDs do delete all documents matching the query. This can have performance
* as well as visibility implications. Delete-by-query now has the following semantics:
* <li>
* <ul>it's <tt>non-actomic</tt>, a delete-by-query may fail at any time while some documents matching the query have already been deleted</ul>
* <ul>it's <tt>try-once</tt>, a delete-by-query may fail at any time and will not retry it's execution. All retry logic is left to the user</ul>
* <ul>it's <tt>syntactic sugar</tt>, a delete-by-query is equivalent to a scan/scroll search and corresponding bulk-deletes by ID</ul>
* <ul>it's executed on a <tt>point-in-time</tt> snapshot, a delete-by-query will only delete the documents that are visible at the point in time the delete-by-query was started, equivalent to the scan/scroll API</ul>
* <ul>it's <tt>consistent</tt>, a delete-by-query will yield consistent results across all replicas of a shard</ul>
* <ul>it's <tt>forward-compativle</tt>, a delete-by-query will only send IDs to the shards as deletes such that no queries are stored in the transaction logs that might not be supported in the future.</ul>
* <ul>it's results won't be visible until the user refreshes the index.</ul>
* </li>
*
* The main reason why delete-by-query is now extracted as a plugin are:
* <li>
* <ul><tt>forward-compatibility</tt>, the previous implementation was prone to store unsupported queries in the transaction logs which is equvalent to data-loss</ul>
* <ul><tt>consistency & correctness</tt>, the previous implementation was prone to produce different results on a shards replica which can essentially result in a corrupted index</ul>
* <ul><tt>resiliency</tt>, the previous implementation could cause OOM errors, merge-storms and dramatic slowdowns if used incorrectly</ul>
* </li>
*
* While delete-by-query is a very useful feature, it's implementation is very tricky in system that is based on per-document modifications. The move towards
* a plugin based solution was mainly done to minimize the risk of cluster failures or corrupted indices which where easily possible wiht the previous implementation.
* Users that rely delete by query should install the plugin in oder to use this functionality.
*/
public class DeleteByQueryRequest extends ActionRequest<DeleteByQueryRequest> implements IndicesRequest.Replaceable {
private String[] indices = Strings.EMPTY_ARRAY;

View File

@ -31,6 +31,10 @@ import org.elasticsearch.index.query.QueryBuilder;
import java.util.Map;
/**
* Creates a new {@link DeleteByQueryRequestBuilder}
* @see DeleteByQueryRequest
*/
public class DeleteByQueryRequestBuilder extends ActionRequestBuilder<DeleteByQueryRequest, DeleteByQueryResponse, DeleteByQueryRequestBuilder> {
private QuerySourceBuilder sourceBuilder;

View File

@ -35,6 +35,7 @@ import static org.elasticsearch.action.search.ShardSearchFailure.readShardSearch
/**
* Delete by query response
* @see DeleteByQueryRequest
*/
public class DeleteByQueryResponse extends ActionResponse implements ToXContent {

View File

@ -37,6 +37,9 @@ import org.elasticsearch.rest.action.support.RestToXContentListener;
import static org.elasticsearch.action.deletebyquery.DeleteByQueryAction.INSTANCE;
import static org.elasticsearch.rest.RestRequest.Method.DELETE;
/**
* @see DeleteByQueryRequest
*/
public class RestDeleteByQueryAction extends BaseRestHandler {
@Inject

View File

@ -72,6 +72,7 @@ public class IndexDeleteByQueryResponseTests extends ElasticsearchTestCase {
@Test
public void testNegativeCounters() {
assumeTrue("assertions must be enable for this test to pass", assertionsEnabled());
try {
new IndexDeleteByQueryResponse("index", -1L, 0L, 0L, 0L);
fail("should have thrown an assertion error concerning the negative counter");
@ -107,6 +108,7 @@ public class IndexDeleteByQueryResponseTests extends ElasticsearchTestCase {
@Test
public void testNegativeIncrements() {
assumeTrue("assertions must be enable for this test to pass", assertionsEnabled());
try {
IndexDeleteByQueryResponse response = new IndexDeleteByQueryResponse();
response.incrementFound(-10L);
@ -163,4 +165,5 @@ public class IndexDeleteByQueryResponseTests extends ElasticsearchTestCase {
assertThat(deserializedResponse.getMissing(), equalTo(response.getMissing()));
assertThat(deserializedResponse.getFailed(), equalTo(response.getFailed()));
}
}

View File

@ -110,7 +110,7 @@
<!-- REST API specifications copied from main Elasticsearch specs
because they are required to execute the REST tests in plugins -->
<testResource>
<directory>${project.basedir}/../../rest-api-spec</directory>
<directory>${elasticsearch.tools.directory}/rest-api-spec</directory>
<targetPath>rest-api-spec</targetPath>
<includes>
<!-- required by the test framework -->

Some files were not shown because too many files have changed in this diff Show More