Merge branch 'master' into pr/16598-register-filter-settings

This commit is contained in:
David Pilato 2016-03-05 11:37:03 +01:00
commit e35032950e
56 changed files with 988 additions and 575 deletions

View File

@ -310,7 +310,7 @@ public class AllocationService extends AbstractComponent {
} }
// move shards that no longer can be allocated // move shards that no longer can be allocated
changed |= moveShards(allocation); changed |= shardsAllocators.moveShards(allocation);
// rebalance // rebalance
changed |= shardsAllocators.rebalance(allocation); changed |= shardsAllocators.rebalance(allocation);
@ -327,46 +327,6 @@ public class AllocationService extends AbstractComponent {
} }
} }
private boolean moveShards(RoutingAllocation allocation) {
boolean changed = false;
// create a copy of the shards interleaving between nodes, and check if they can remain
List<ShardRouting> shards = new ArrayList<>();
int index = 0;
boolean found = true;
final RoutingNodes routingNodes = allocation.routingNodes();
while (found) {
found = false;
for (RoutingNode routingNode : routingNodes) {
if (index >= routingNode.size()) {
continue;
}
found = true;
shards.add(routingNode.get(index));
}
index++;
}
for (int i = 0; i < shards.size(); i++) {
ShardRouting shardRouting = shards.get(i);
// we can only move started shards...
if (!shardRouting.started()) {
continue;
}
final RoutingNode routingNode = routingNodes.node(shardRouting.currentNodeId());
Decision decision = allocation.deciders().canRemain(shardRouting, routingNode, allocation);
if (decision.type() == Decision.Type.NO) {
logger.debug("[{}][{}] allocated on [{}], but can no longer be allocated on it, moving...", shardRouting.index(), shardRouting.id(), routingNode.node());
boolean moved = shardsAllocators.move(shardRouting, routingNode, allocation);
if (!moved) {
logger.debug("[{}][{}] can't move", shardRouting.index(), shardRouting.id());
} else {
changed = true;
}
}
}
return changed;
}
private boolean electPrimariesAndUnassignedDanglingReplicas(RoutingAllocation allocation) { private boolean electPrimariesAndUnassignedDanglingReplicas(RoutingAllocation allocation) {
boolean changed = false; boolean changed = false;
final RoutingNodes routingNodes = allocation.routingNodes(); final RoutingNodes routingNodes = allocation.routingNodes();

View File

@ -43,6 +43,7 @@ import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.gateway.PriorityComparator; import org.elasticsearch.gateway.PriorityComparator;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
@ -50,6 +51,7 @@ import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.IdentityHashMap; import java.util.IdentityHashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.function.Predicate; import java.util.function.Predicate;
@ -124,9 +126,9 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
} }
@Override @Override
public boolean move(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { public boolean moveShards(RoutingAllocation allocation) {
final Balancer balancer = new Balancer(logger, allocation, weightFunction, threshold); final Balancer balancer = new Balancer(logger, allocation, weightFunction, threshold);
return balancer.move(shardRouting, node); return balancer.moveShards();
} }
/** /**
@ -494,56 +496,93 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
} }
/** /**
* This function executes a move operation moving the given shard from * Move started shards that can not be allocated to a node anymore
* the given node to the minimal eligible node with respect to the *
* weight function. Iff the shard is moved the shard will be set to * For each shard to be moved this function executes a move operation
* to the minimal eligible node with respect to the
* weight function. If a shard is moved the shard will be set to
* {@link ShardRoutingState#RELOCATING} and a shadow instance of this * {@link ShardRoutingState#RELOCATING} and a shadow instance of this
* shard is created with an incremented version in the state * shard is created with an incremented version in the state
* {@link ShardRoutingState#INITIALIZING}. * {@link ShardRoutingState#INITIALIZING}.
* *
* @return <code>true</code> iff the shard has successfully been moved. * @return <code>true</code> if the allocation has changed, otherwise <code>false</code>
*/ */
public boolean move(ShardRouting shard, RoutingNode node ) { public boolean moveShards() {
if (nodes.isEmpty() || !shard.started()) { if (nodes.isEmpty()) {
/* with no nodes or a not started shard this is pointless */ /* with no nodes this is pointless */
return false; return false;
} }
if (logger.isTraceEnabled()) {
logger.trace("Try moving shard [{}] from [{}]", shard, node);
}
final RoutingNodes.UnassignedShards unassigned = routingNodes.unassigned();
boolean changed = initialize(routingNodes, unassigned);
if (!changed) {
final ModelNode sourceNode = nodes.get(node.nodeId());
assert sourceNode != null;
final NodeSorter sorter = newNodeSorter();
sorter.reset(shard.getIndexName());
final ModelNode[] nodes = sorter.modelNodes;
assert sourceNode.containsShard(shard);
/*
* the sorter holds the minimum weight node first for the shards index.
* We now walk through the nodes until we find a node to allocate the shard.
* This is not guaranteed to be balanced after this operation we still try best effort to
* allocate on the minimal eligible node.
*/
for (ModelNode currentNode : nodes) { // Create a copy of the started shards interleaving between nodes, and check if they can remain. In the presence of throttling
if (currentNode.getNodeId().equals(node.nodeId())) { // shard movements, the goal of this iteration order is to achieve a fairer movement of shards from the nodes that are
// offloading the shards.
List<ShardRouting> shards = new ArrayList<>();
int index = 0;
boolean found = true;
while (found) {
found = false;
for (RoutingNode routingNode : routingNodes) {
if (index >= routingNode.size()) {
continue; continue;
} }
RoutingNode target = currentNode.getRoutingNode(routingNodes); found = true;
Decision allocationDecision = allocation.deciders().canAllocate(shard, target, allocation); ShardRouting shardRouting = routingNode.get(index);
Decision rebalanceDecision = allocation.deciders().canRebalance(shard, allocation); // we can only move started shards...
Decision decision = new Decision.Multi().add(allocationDecision).add(rebalanceDecision); if (shardRouting.started()) {
if (decision.type() == Type.YES) { // TODO maybe we can respect throttling here too? shards.add(shardRouting);
sourceNode.removeShard(shard); }
ShardRouting targetRelocatingShard = routingNodes.relocate(shard, target.nodeId(), allocation.clusterInfo().getShardSize(shard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE)); }
currentNode.addShard(targetRelocatingShard, decision); index++;
if (logger.isTraceEnabled()) { }
logger.trace("Moved shard [{}] to node [{}]", shard, currentNode.getNodeId()); if (shards.isEmpty()) {
return false;
}
final RoutingNodes.UnassignedShards unassigned = routingNodes.unassigned();
boolean changed = initialize(routingNodes, unassigned);
if (changed == false) {
final NodeSorter sorter = newNodeSorter();
final ModelNode[] modelNodes = sorter.modelNodes;
for (ShardRouting shardRouting : shards) {
final ModelNode sourceNode = nodes.get(shardRouting.currentNodeId());
assert sourceNode != null && sourceNode.containsShard(shardRouting);
final RoutingNode routingNode = sourceNode.getRoutingNode(routingNodes);
Decision decision = allocation.deciders().canRemain(shardRouting, routingNode, allocation);
if (decision.type() == Decision.Type.NO) {
logger.debug("[{}][{}] allocated on [{}], but can no longer be allocated on it, moving...", shardRouting.index(), shardRouting.id(), routingNode.node());
sorter.reset(shardRouting.getIndexName());
/*
* the sorter holds the minimum weight node first for the shards index.
* We now walk through the nodes until we find a node to allocate the shard.
* This is not guaranteed to be balanced after this operation we still try best effort to
* allocate on the minimal eligible node.
*/
boolean moved = false;
for (ModelNode currentNode : modelNodes) {
if (currentNode == sourceNode) {
continue;
}
RoutingNode target = currentNode.getRoutingNode(routingNodes);
Decision allocationDecision = allocation.deciders().canAllocate(shardRouting, target, allocation);
Decision rebalanceDecision = allocation.deciders().canRebalance(shardRouting, allocation);
if (allocationDecision.type() == Type.YES && rebalanceDecision.type() == Type.YES) { // TODO maybe we can respect throttling here too?
Decision sourceDecision = sourceNode.removeShard(shardRouting);
ShardRouting targetRelocatingShard = routingNodes.relocate(shardRouting, target.nodeId(), allocation.clusterInfo().getShardSize(shardRouting, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE));
// re-add (now relocating shard) to source node
sourceNode.addShard(shardRouting, sourceDecision);
Decision targetDecision = new Decision.Multi().add(allocationDecision).add(rebalanceDecision);
currentNode.addShard(targetRelocatingShard, targetDecision);
if (logger.isTraceEnabled()) {
logger.trace("Moved shard [{}] to node [{}]", shardRouting, routingNode.node());
}
moved = true;
changed = true;
break;
}
}
if (moved == false) {
logger.debug("[{}][{}] can't move", shardRouting.index(), shardRouting.id());
} }
changed = true;
break;
} }
} }
} }

View File

@ -19,7 +19,6 @@
package org.elasticsearch.cluster.routing.allocation.allocator; package org.elasticsearch.cluster.routing.allocation.allocator;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation; import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation;
@ -36,22 +35,22 @@ import org.elasticsearch.cluster.routing.allocation.StartedRerouteAllocation;
public interface ShardsAllocator { public interface ShardsAllocator {
/** /**
* Applies changes on started nodes based on the implemented algorithm. For example if a * Applies changes on started nodes based on the implemented algorithm. For example if a
* shard has changed to {@link ShardRoutingState#STARTED} from {@link ShardRoutingState#RELOCATING} * shard has changed to {@link ShardRoutingState#STARTED} from {@link ShardRoutingState#RELOCATING}
* this allocator might apply some cleanups on the node that used to hold the shard. * this allocator might apply some cleanups on the node that used to hold the shard.
* @param allocation all started {@link ShardRouting shards} * @param allocation all started {@link ShardRouting shards}
*/ */
void applyStartedShards(StartedRerouteAllocation allocation); void applyStartedShards(StartedRerouteAllocation allocation);
/** /**
* Applies changes on failed nodes based on the implemented algorithm. * Applies changes on failed nodes based on the implemented algorithm.
* @param allocation all failed {@link ShardRouting shards} * @param allocation all failed {@link ShardRouting shards}
*/ */
void applyFailedShards(FailedRerouteAllocation allocation); void applyFailedShards(FailedRerouteAllocation allocation);
/** /**
* Assign all unassigned shards to nodes * Assign all unassigned shards to nodes
* *
* @param allocation current node allocation * @param allocation current node allocation
* @return <code>true</code> if the allocation has changed, otherwise <code>false</code> * @return <code>true</code> if the allocation has changed, otherwise <code>false</code>
*/ */
@ -59,19 +58,17 @@ public interface ShardsAllocator {
/** /**
* Rebalancing number of shards on all nodes * Rebalancing number of shards on all nodes
* *
* @param allocation current node allocation * @param allocation current node allocation
* @return <code>true</code> if the allocation has changed, otherwise <code>false</code> * @return <code>true</code> if the allocation has changed, otherwise <code>false</code>
*/ */
boolean rebalance(RoutingAllocation allocation); boolean rebalance(RoutingAllocation allocation);
/** /**
* Moves a shard from the given node to other node. * Move started shards that can not be allocated to a node anymore
* *
* @param shardRouting the shard to move
* @param node A node containing the shard
* @param allocation current node allocation * @param allocation current node allocation
* @return <code>true</code> if the allocation has changed, otherwise <code>false</code> * @return <code>true</code> if the allocation has changed, otherwise <code>false</code>
*/ */
boolean move(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation); boolean moveShards(RoutingAllocation allocation);
} }

View File

@ -19,8 +19,6 @@
package org.elasticsearch.cluster.routing.allocation.allocator; package org.elasticsearch.cluster.routing.allocation.allocator;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation; import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
import org.elasticsearch.cluster.routing.allocation.StartedRerouteAllocation; import org.elasticsearch.cluster.routing.allocation.StartedRerouteAllocation;
@ -96,7 +94,7 @@ public class ShardsAllocators extends AbstractComponent implements ShardsAllocat
} }
@Override @Override
public boolean move(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { public boolean moveShards(RoutingAllocation allocation) {
return allocator.move(shardRouting, node, allocation); return allocator.moveShards(allocation);
} }
} }

View File

@ -98,7 +98,6 @@ class DocumentParser implements Closeable {
} }
reverseOrder(context); reverseOrder(context);
applyDocBoost(context);
ParsedDocument doc = parsedDocument(source, context, update(context, mapping)); ParsedDocument doc = parsedDocument(source, context, update(context, mapping));
// reset the context to free up memory // reset the context to free up memory
@ -186,24 +185,6 @@ class DocumentParser implements Closeable {
} }
} }
private static void applyDocBoost(ParseContext.InternalParseContext context) {
// apply doc boost
if (context.docBoost() != 1.0f) {
Set<String> encounteredFields = new HashSet<>();
for (ParseContext.Document doc : context.docs()) {
encounteredFields.clear();
for (IndexableField field : doc) {
if (field.fieldType().indexOptions() != IndexOptions.NONE && !field.fieldType().omitNorms()) {
if (!encounteredFields.contains(field.name())) {
((Field) field).setBoost(context.docBoost() * field.boost());
encounteredFields.add(field.name());
}
}
}
}
}
}
private static ParsedDocument parsedDocument(SourceToParse source, ParseContext.InternalParseContext context, Mapping update) { private static ParsedDocument parsedDocument(SourceToParse source, ParseContext.InternalParseContext context, Mapping update) {
return new ParsedDocument( return new ParsedDocument(
context.uid(), context.uid(),

View File

@ -303,7 +303,8 @@ public abstract class FieldMapper extends Mapper implements Cloneable {
for (Field field : fields) { for (Field field : fields) {
if (!customBoost() if (!customBoost()
// don't set boosts eg. on dv fields // don't set boosts eg. on dv fields
&& field.fieldType().indexOptions() != IndexOptions.NONE) { && field.fieldType().indexOptions() != IndexOptions.NONE
&& Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(fieldType().boost()); field.setBoost(fieldType().boost());
} }
context.doc().add(field); context.doc().add(field);

View File

@ -32,7 +32,9 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.action.fieldstats.FieldStats; import org.elasticsearch.action.fieldstats.FieldStats;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
@ -398,7 +400,12 @@ public abstract class MappedFieldType extends FieldType {
} }
public Query termQuery(Object value, @Nullable QueryShardContext context) { public Query termQuery(Object value, @Nullable QueryShardContext context) {
return new TermQuery(createTerm(value)); TermQuery query = new TermQuery(createTerm(value));
if (boost == 1f ||
(context != null && context.indexVersionCreated().before(Version.V_5_0_0))) {
return query;
}
return new BoostQuery(query, boost);
} }
public Query termsQuery(List values, @Nullable QueryShardContext context) { public Query termsQuery(List values, @Nullable QueryShardContext context) {

View File

@ -321,16 +321,6 @@ public abstract class ParseContext {
return in.externalValue(); return in.externalValue();
} }
@Override
public float docBoost() {
return in.docBoost();
}
@Override
public void docBoost(float docBoost) {
in.docBoost(docBoost);
}
@Override @Override
public StringBuilder stringBuilder() { public StringBuilder stringBuilder() {
return in.stringBuilder(); return in.stringBuilder();
@ -375,8 +365,6 @@ public abstract class ParseContext {
private AllEntries allEntries = new AllEntries(); private AllEntries allEntries = new AllEntries();
private float docBoost = 1.0f;
private Mapper dynamicMappingsUpdate = null; private Mapper dynamicMappingsUpdate = null;
public InternalParseContext(@Nullable Settings indexSettings, DocumentMapperParser docMapperParser, DocumentMapper docMapper, ContentPath path) { public InternalParseContext(@Nullable Settings indexSettings, DocumentMapperParser docMapperParser, DocumentMapper docMapper, ContentPath path) {
@ -402,7 +390,6 @@ public abstract class ParseContext {
this.source = source == null ? null : sourceToParse.source(); this.source = source == null ? null : sourceToParse.source();
this.path.reset(); this.path.reset();
this.allEntries = new AllEntries(); this.allEntries = new AllEntries();
this.docBoost = 1.0f;
this.dynamicMappingsUpdate = null; this.dynamicMappingsUpdate = null;
} }
@ -534,16 +521,6 @@ public abstract class ParseContext {
return this.allEntries; return this.allEntries;
} }
@Override
public float docBoost() {
return this.docBoost;
}
@Override
public void docBoost(float docBoost) {
this.docBoost = docBoost;
}
/** /**
* A string builder that can be used to construct complex names for example. * A string builder that can be used to construct complex names for example.
* Its better to reuse the. * Its better to reuse the.
@ -759,10 +736,6 @@ public abstract class ParseContext {
return clazz.cast(externalValue()); return clazz.cast(externalValue());
} }
public abstract float docBoost();
public abstract void docBoost(float docBoost);
/** /**
* A string builder that can be used to construct complex names for example. * A string builder that can be used to construct complex names for example.
* Its better to reuse the. * Its better to reuse the.

View File

@ -285,7 +285,9 @@ public class ByteFieldMapper extends NumberFieldMapper {
} }
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
CustomByteNumericField field = new CustomByteNumericField(value, fieldType()); CustomByteNumericField field = new CustomByteNumericField(value, fieldType());
field.setBoost(boost); if (boost != 1f && Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(boost);
}
fields.add(field); fields.add(field);
} }
if (fieldType().hasDocValues()) { if (fieldType().hasDocValues()) {

View File

@ -513,7 +513,9 @@ public class DateFieldMapper extends NumberFieldMapper {
if (value != null) { if (value != null) {
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
CustomLongNumericField field = new CustomLongNumericField(value, fieldType()); CustomLongNumericField field = new CustomLongNumericField(value, fieldType());
field.setBoost(boost); if (boost != 1f && Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(boost);
}
fields.add(field); fields.add(field);
} }
if (fieldType().hasDocValues()) { if (fieldType().hasDocValues()) {

View File

@ -278,7 +278,9 @@ public class DoubleFieldMapper extends NumberFieldMapper {
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
CustomDoubleNumericField field = new CustomDoubleNumericField(value, fieldType()); CustomDoubleNumericField field = new CustomDoubleNumericField(value, fieldType());
field.setBoost(boost); if (boost != 1f && Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(boost);
}
fields.add(field); fields.add(field);
} }
if (fieldType().hasDocValues()) { if (fieldType().hasDocValues()) {

View File

@ -290,7 +290,9 @@ public class FloatFieldMapper extends NumberFieldMapper {
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
CustomFloatNumericField field = new CustomFloatNumericField(value, fieldType()); CustomFloatNumericField field = new CustomFloatNumericField(value, fieldType());
field.setBoost(boost); if (boost != 1f && Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(boost);
}
fields.add(field); fields.add(field);
} }
if (fieldType().hasDocValues()) { if (fieldType().hasDocValues()) {

View File

@ -298,7 +298,9 @@ public class IntegerFieldMapper extends NumberFieldMapper {
protected void addIntegerFields(ParseContext context, List<Field> fields, int value, float boost) { protected void addIntegerFields(ParseContext context, List<Field> fields, int value, float boost) {
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
CustomIntegerNumericField field = new CustomIntegerNumericField(value, fieldType()); CustomIntegerNumericField field = new CustomIntegerNumericField(value, fieldType());
field.setBoost(boost); if (boost != 1f && Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(boost);
}
fields.add(field); fields.add(field);
} }
if (fieldType().hasDocValues()) { if (fieldType().hasDocValues()) {

View File

@ -282,7 +282,9 @@ public class LongFieldMapper extends NumberFieldMapper {
} }
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
CustomLongNumericField field = new CustomLongNumericField(value, fieldType()); CustomLongNumericField field = new CustomLongNumericField(value, fieldType());
field.setBoost(boost); if (boost != 1f && Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(boost);
}
fields.add(field); fields.add(field);
} }
if (fieldType().hasDocValues()) { if (fieldType().hasDocValues()) {

View File

@ -290,7 +290,9 @@ public class ShortFieldMapper extends NumberFieldMapper {
} }
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
CustomShortNumericField field = new CustomShortNumericField(value, fieldType()); CustomShortNumericField field = new CustomShortNumericField(value, fieldType());
field.setBoost(boost); if (boost != 1f && Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(boost);
}
fields.add(field); fields.add(field);
} }
if (fieldType().hasDocValues()) { if (fieldType().hasDocValues()) {

View File

@ -317,7 +317,9 @@ public class StringFieldMapper extends FieldMapper implements AllFieldMapper.Inc
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
Field field = new Field(fieldType().name(), valueAndBoost.value(), fieldType()); Field field = new Field(fieldType().name(), valueAndBoost.value(), fieldType());
field.setBoost(valueAndBoost.boost()); if (valueAndBoost.boost() != 1f && Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(valueAndBoost.boost());
}
fields.add(field); fields.add(field);
} }
if (fieldType().hasDocValues()) { if (fieldType().hasDocValues()) {

View File

@ -30,6 +30,7 @@ import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree;
import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.geo.GeoUtils; import org.elasticsearch.common.geo.GeoUtils;
@ -452,7 +453,8 @@ public class GeoShapeFieldMapper extends FieldMapper {
return null; return null;
} }
for (Field field : fields) { for (Field field : fields) {
if (!customBoost()) { if (!customBoost() &&
fieldType.boost() != 1f && Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(fieldType().boost()); field.setBoost(fieldType().boost());
} }
context.doc().add(field); context.doc().add(field);

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Numbers; import org.elasticsearch.common.Numbers;
@ -305,7 +306,9 @@ public class IpFieldMapper extends NumberFieldMapper {
final long value = ipToLong(ipAsString); final long value = ipToLong(ipAsString);
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
CustomLongNumericField field = new CustomLongNumericField(value, fieldType()); CustomLongNumericField field = new CustomLongNumericField(value, fieldType());
field.setBoost(fieldType().boost()); if (fieldType.boost() != 1f && Version.indexCreated(context.indexSettings()).before(Version.V_5_0_0)) {
field.setBoost(fieldType().boost());
}
fields.add(field); fields.add(field);
} }
if (fieldType().hasDocValues()) { if (fieldType().hasDocValues()) {

View File

@ -333,9 +333,18 @@ public class TransportService extends AbstractLifecycleComponent<TransportServic
// callback that an exception happened, but on a different thread since we don't // callback that an exception happened, but on a different thread since we don't
// want handlers to worry about stack overflows // want handlers to worry about stack overflows
final SendRequestTransportException sendRequestException = new SendRequestTransportException(node, action, e); final SendRequestTransportException sendRequestException = new SendRequestTransportException(node, action, e);
threadPool.executor(ThreadPool.Names.GENERIC).execute(new Runnable() { threadPool.executor(ThreadPool.Names.GENERIC).execute(new AbstractRunnable() {
@Override @Override
public void run() { public void onRejection(Throwable t) {
// if we get rejected during node shutdown we don't wanna bubble it up
logger.debug("failed to notify response handler on rejection", t);
}
@Override
public void onFailure(Throwable t) {
logger.warn("failed to notify response handler on exception", t);
}
@Override
protected void doRun() throws Exception {
holderToNotify.handler().handleException(sendRequestException); holderToNotify.handler().handleException(sendRequestException);
} }
}); });

View File

@ -477,7 +477,7 @@ public class IndexAliasesIT extends ESIntegTestCase {
}); });
} }
executor.shutdown(); executor.shutdown();
boolean done = executor.awaitTermination(10, TimeUnit.SECONDS); boolean done = executor.awaitTermination(20, TimeUnit.SECONDS);
assertThat(done, equalTo(true)); assertThat(done, equalTo(true));
if (!done) { if (!done) {
executor.shutdownNow(); executor.shutdownNow();

View File

@ -61,7 +61,7 @@ public class ClusterModuleTests extends ModuleTestCase {
return false; return false;
} }
@Override @Override
public boolean move(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { public boolean moveShards(RoutingAllocation allocation) {
return false; return false;
} }
} }

View File

@ -320,7 +320,7 @@ public class BalanceConfigurationTests extends ESAllocationTestCase {
} }
@Override @Override
public boolean move(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { public boolean moveShards(RoutingAllocation allocation) {
return false; return false;
} }

View File

@ -19,13 +19,18 @@
package org.elasticsearch.index.mapper.boost; package org.elasticsearch.index.mapper.boost;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.mapper.DocumentFieldMappers;
import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.InternalSettingsPlugin; import org.elasticsearch.test.InternalSettingsPlugin;
@ -33,6 +38,7 @@ import org.elasticsearch.test.InternalSettingsPlugin;
import java.util.Collection; import java.util.Collection;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
public class CustomBoostMappingTests extends ESSingleNodeTestCase { public class CustomBoostMappingTests extends ESSingleNodeTestCase {
@ -77,4 +83,87 @@ public class CustomBoostMappingTests extends ESSingleNodeTestCase {
assertThat(doc.rootDoc().getField("f_field").boost(), equalTo(8.0f)); assertThat(doc.rootDoc().getField("f_field").boost(), equalTo(8.0f));
assertThat(doc.rootDoc().getField("date_field").boost(), equalTo(9.0f)); assertThat(doc.rootDoc().getField("date_field").boost(), equalTo(9.0f));
} }
public void testBackCompatFieldMappingBoostValues() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
.startObject("s_field").field("type", "keyword").field("boost", 2.0f).endObject()
.startObject("l_field").field("type", "long").field("boost", 3.0f).startObject("norms").field("enabled", true).endObject().endObject()
.startObject("i_field").field("type", "integer").field("boost", 4.0f).startObject("norms").field("enabled", true).endObject().endObject()
.startObject("sh_field").field("type", "short").field("boost", 5.0f).startObject("norms").field("enabled", true).endObject().endObject()
.startObject("b_field").field("type", "byte").field("boost", 6.0f).startObject("norms").field("enabled", true).endObject().endObject()
.startObject("d_field").field("type", "double").field("boost", 7.0f).startObject("norms").field("enabled", true).endObject().endObject()
.startObject("f_field").field("type", "float").field("boost", 8.0f).startObject("norms").field("enabled", true).endObject().endObject()
.startObject("date_field").field("type", "date").field("boost", 9.0f).startObject("norms").field("enabled", true).endObject().endObject()
.endObject().endObject().endObject().string();
{
IndexService indexService = createIndex("test", BW_SETTINGS);
QueryShardContext context = indexService.newQueryShardContext();
DocumentMapper mapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
DocumentFieldMappers fieldMappers = mapper.mappers();
assertThat(fieldMappers.getMapper("s_field").fieldType().termQuery("0", context), instanceOf(TermQuery.class));
assertThat(fieldMappers.getMapper("l_field").fieldType().termQuery("0", context), instanceOf(TermQuery.class));
assertThat(fieldMappers.getMapper("i_field").fieldType().termQuery("0", context), instanceOf(TermQuery.class));
assertThat(fieldMappers.getMapper("sh_field").fieldType().termQuery("0", context), instanceOf(TermQuery.class));
assertThat(fieldMappers.getMapper("b_field").fieldType().termQuery("0", context), instanceOf(TermQuery.class));
assertThat(fieldMappers.getMapper("d_field").fieldType().termQuery("0", context), instanceOf(TermQuery.class));
assertThat(fieldMappers.getMapper("f_field").fieldType().termQuery("0", context), instanceOf(TermQuery.class));
assertThat(fieldMappers.getMapper("date_field").fieldType().termQuery("0", context), instanceOf(TermQuery.class));
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject()
.field("s_field", "s_value")
.field("l_field", 1L)
.field("i_field", 1)
.field("sh_field", 1)
.field("b_field", 1)
.field("d_field", 1)
.field("f_field", 1)
.field("date_field", "20100101")
.endObject().bytes());
assertThat(doc.rootDoc().getField("s_field").boost(), equalTo(2.0f));
assertThat(doc.rootDoc().getField("l_field").boost(), equalTo(3.0f));
assertThat(doc.rootDoc().getField("i_field").boost(), equalTo(4.0f));
assertThat(doc.rootDoc().getField("sh_field").boost(), equalTo(5.0f));
assertThat(doc.rootDoc().getField("b_field").boost(), equalTo(6.0f));
assertThat(doc.rootDoc().getField("d_field").boost(), equalTo(7.0f));
assertThat(doc.rootDoc().getField("f_field").boost(), equalTo(8.0f));
assertThat(doc.rootDoc().getField("date_field").boost(), equalTo(9.0f));
}
{
IndexService indexService = createIndex("text");
QueryShardContext context = indexService.newQueryShardContext();
DocumentMapper mapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
DocumentFieldMappers fieldMappers = mapper.mappers();
assertThat(fieldMappers.getMapper("s_field").fieldType().termQuery("0", context), instanceOf(BoostQuery.class));
assertThat(fieldMappers.getMapper("l_field").fieldType().termQuery("0", context), instanceOf(BoostQuery.class));
assertThat(fieldMappers.getMapper("i_field").fieldType().termQuery("0", context), instanceOf(BoostQuery.class));
assertThat(fieldMappers.getMapper("sh_field").fieldType().termQuery("0", context), instanceOf(BoostQuery.class));
assertThat(fieldMappers.getMapper("b_field").fieldType().termQuery("0", context), instanceOf(BoostQuery.class));
assertThat(fieldMappers.getMapper("d_field").fieldType().termQuery("0", context), instanceOf(BoostQuery.class));
assertThat(fieldMappers.getMapper("f_field").fieldType().termQuery("0", context), instanceOf(BoostQuery.class));
assertThat(fieldMappers.getMapper("date_field").fieldType().termQuery("0", context), instanceOf(BoostQuery.class));
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject()
.field("s_field", "s_value")
.field("l_field", 1L)
.field("i_field", 1)
.field("sh_field", 1)
.field("b_field", 1)
.field("d_field", 1)
.field("f_field", 1)
.field("date_field", "20100101")
.endObject().bytes());
assertThat(doc.rootDoc().getField("s_field").boost(), equalTo(1f));
assertThat(doc.rootDoc().getField("l_field").boost(), equalTo(1f));
assertThat(doc.rootDoc().getField("i_field").boost(), equalTo(1f));
assertThat(doc.rootDoc().getField("sh_field").boost(), equalTo(1f));
assertThat(doc.rootDoc().getField("b_field").boost(), equalTo(1f));
assertThat(doc.rootDoc().getField("d_field").boost(), equalTo(1f));
assertThat(doc.rootDoc().getField("f_field").boost(), equalTo(1f));
assertThat(doc.rootDoc().getField("date_field").boost(), equalTo(1f));
}
}
} }

View File

@ -36,6 +36,7 @@ import org.elasticsearch.test.InternalSettingsPlugin;
import java.util.Collection; import java.util.Collection;
import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.equalTo;
/** /**
*/ */
@ -98,6 +99,97 @@ public class FieldLevelBoostTests extends ESSingleNodeTestCase {
assertThat((double) f.boost(), closeTo(9.0, 0.001)); assertThat((double) f.boost(), closeTo(9.0, 0.001));
} }
public void testBackCompatFieldLevelMappingBoost() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("person").startObject("properties")
.startObject("str_field").field("type", "keyword").field("boost", "2.0").endObject()
.startObject("int_field").field("type", "integer").field("boost", "3.0").startObject("norms").field("enabled", true).endObject().endObject()
.startObject("byte_field").field("type", "byte").field("boost", "4.0").startObject("norms").field("enabled", true).endObject().endObject()
.startObject("date_field").field("type", "date").field("boost", "5.0").startObject("norms").field("enabled", true).endObject().endObject()
.startObject("double_field").field("type", "double").field("boost", "6.0").startObject("norms").field("enabled", true).endObject().endObject()
.startObject("float_field").field("type", "float").field("boost", "7.0").startObject("norms").field("enabled", true).endObject().endObject()
.startObject("long_field").field("type", "long").field("boost", "8.0").startObject("norms").field("enabled", true).endObject().endObject()
.startObject("short_field").field("type", "short").field("boost", "9.0").startObject("norms").field("enabled", true).endObject().endObject()
.string();
{
DocumentMapper docMapper = createIndex("test", BW_SETTINGS).mapperService().documentMapperParser().parse("person", new CompressedXContent(mapping));
BytesReference json = XContentFactory.jsonBuilder().startObject()
.field("str_field", "some name")
.field("int_field", 10)
.field("byte_field", 20)
.field("date_field", "2012-01-10")
.field("double_field", 30.0)
.field("float_field", 40.0)
.field("long_field", 50)
.field("short_field", 60)
.bytes();
Document doc = docMapper.parse("test", "person", "1", json).rootDoc();
IndexableField f = doc.getField("str_field");
assertThat((double) f.boost(), closeTo(2.0, 0.001));
f = doc.getField("int_field");
assertThat((double) f.boost(), closeTo(3.0, 0.001));
f = doc.getField("byte_field");
assertThat((double) f.boost(), closeTo(4.0, 0.001));
f = doc.getField("date_field");
assertThat((double) f.boost(), closeTo(5.0, 0.001));
f = doc.getField("double_field");
assertThat((double) f.boost(), closeTo(6.0, 0.001));
f = doc.getField("float_field");
assertThat((double) f.boost(), closeTo(7.0, 0.001));
f = doc.getField("long_field");
assertThat((double) f.boost(), closeTo(8.0, 0.001));
f = doc.getField("short_field");
assertThat((double) f.boost(), closeTo(9.0, 0.001));
}
{
DocumentMapper docMapper = createIndex("test2").mapperService().documentMapperParser().parse("person", new CompressedXContent(mapping));
BytesReference json = XContentFactory.jsonBuilder().startObject()
.field("str_field", "some name")
.field("int_field", 10)
.field("byte_field", 20)
.field("date_field", "2012-01-10")
.field("double_field", 30.0)
.field("float_field", 40.0)
.field("long_field", 50)
.field("short_field", 60)
.bytes();
Document doc = docMapper.parse("test", "person", "1", json).rootDoc();
IndexableField f = doc.getField("str_field");
assertThat(f.boost(), equalTo(1f));
f = doc.getField("int_field");
assertThat(f.boost(), equalTo(1f));
f = doc.getField("byte_field");
assertThat(f.boost(), equalTo(1f));
f = doc.getField("date_field");
assertThat(f.boost(), equalTo(1f));
f = doc.getField("double_field");
assertThat(f.boost(), equalTo(1f));
f = doc.getField("float_field");
assertThat(f.boost(), equalTo(1f));
f = doc.getField("long_field");
assertThat(f.boost(), equalTo(1f));
f = doc.getField("short_field");
assertThat(f.boost(), equalTo(1f));
}
}
public void testBackCompatInvalidFieldLevelBoost() throws Exception { public void testBackCompatInvalidFieldLevelBoost() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("person").startObject("properties") String mapping = XContentFactory.jsonBuilder().startObject().startObject("person").startObject("properties")
.startObject("str_field").field("type", "string").endObject() .startObject("str_field").field("type", "string").endObject()

View File

@ -1,10 +1,10 @@
[[ingest-attachment]] [[ingest-attachment]]
=== Ingest Attachment Processor Plugin === Ingest Attachment Processor Plugin
The ingest attachment plugin lets Elasticsearch extract file attachments in common formats (such as PPT, XLS, PDF) The ingest attachment plugin lets Elasticsearch extract file attachments in common formats (such as PPT, XLS, and PDF) by
using the Apache text extraction library http://lucene.apache.org/tika/[Tika]. using the Apache text extraction library http://lucene.apache.org/tika/[Tika].
It can be used as replacement for the mapper attachment plugin. You can use the ingest attachment plugin as a replacement for the mapper attachment plugin.
The source field must be a base64 encoded binary. The source field must be a base64 encoded binary.
@ -16,7 +16,7 @@ The source field must be a base64 encoded binary.
| `source_field` | yes | - | The field to get the base64 encoded field from | `source_field` | yes | - | The field to get the base64 encoded field from
| `target_field` | no | attachment | The field that will hold the attachment information | `target_field` | no | attachment | The field that will hold the attachment information
| `indexed_chars` | no | 100000 | The number of chars being used for extraction to prevent huge fields. Use `-1` for no limit. | `indexed_chars` | no | 100000 | The number of chars being used for extraction to prevent huge fields. Use `-1` for no limit.
| `fields` | no | all | Properties to select to be stored, can be `content`, `title`, `name`, `author`, `keywords`, `date`, `content_type`, `content_length`, `language` | `fields` | no | all | Properties to select to be stored. Can be `content`, `title`, `name`, `author`, `keywords`, `date`, `content_type`, `content_length`, `language`
|====== |======
[source,js] [source,js]

View File

@ -7,7 +7,7 @@ This processor adds this information by default under the `geoip` field.
The ingest-geoip plugin ships by default with the GeoLite2 City and GeoLite2 Country geoip2 databases from Maxmind made available The ingest-geoip plugin ships by default with the GeoLite2 City and GeoLite2 Country geoip2 databases from Maxmind made available
under the CCA-ShareAlike 3.0 license. For more details see, http://dev.maxmind.com/geoip/geoip2/geolite2/ under the CCA-ShareAlike 3.0 license. For more details see, http://dev.maxmind.com/geoip/geoip2/geolite2/
The GeoIP processor can run with other geoip2 databases from Maxmind. The files must be copied into the geoip config directory The GeoIP processor can run with other geoip2 databases from Maxmind. The files must be copied into the geoip config directory,
and the `database_file` option should be used to specify the filename of the custom database. The geoip config directory and the `database_file` option should be used to specify the filename of the custom database. The geoip config directory
is located at `$ES_HOME/config/ingest/geoip` and holds the shipped databases too. is located at `$ES_HOME/config/ingest/geoip` and holds the shipped databases too.
@ -24,13 +24,13 @@ is located at `$ES_HOME/config/ingest/geoip` and holds the shipped databases too
*Depends on what is available in `database_field`: *Depends on what is available in `database_field`:
* If the GeoLite2 City database is used then the following fields may be added under the `target_field`: `ip`, * If the GeoLite2 City database is used, then the following fields may be added under the `target_field`: `ip`,
`country_iso_code`, `country_name`, `continent_name`, `region_name`, `city_name`, `timezone`, `latitude`, `longitude` `country_iso_code`, `country_name`, `continent_name`, `region_name`, `city_name`, `timezone`, `latitude`, `longitude`
and `location`. The fields actually added depend on what has been found and which fields were configured in `fields`. and `location`. The fields actually added depend on what has been found and which fields were configured in `fields`.
* If the GeoLite2 Country database is used then the following fields may be added under the `target_field`: `ip`, * If the GeoLite2 Country database is used, then the following fields may be added under the `target_field`: `ip`,
`country_iso_code`, `country_name` and `continent_name`.The fields actually added depend on what has been found and which fields were configured in `fields`. `country_iso_code`, `country_name` and `continent_name`. The fields actually added depend on what has been found and which fields were configured in `fields`.
An example that uses the default city database and adds the geographical information to the `geoip` field based on the `ip` field: Here is an example that uses the default city database and adds the geographical information to the `geoip` field based on the `ip` field:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -46,7 +46,7 @@ An example that uses the default city database and adds the geographical informa
} }
-------------------------------------------------- --------------------------------------------------
An example that uses the default country database and add the geographical information to the `geo` field based on the `ip` field`: Here is an example that uses the default country database and adds the geographical information to the `geo` field based on the `ip` field`:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------

View File

@ -268,7 +268,7 @@ POST /_reindex
"company": "cat" "company": "cat"
} }
} }
} },
"index": { "index": {
"index": "dest", "index": "dest",
"routing": "=cat" "routing": "=cat"
@ -277,6 +277,23 @@ POST /_reindex
-------------------------------------------------- --------------------------------------------------
// AUTOSENSE // AUTOSENSE
Reindex can also use the <<ingest>> feature by specifying a
`pipeline` like this:
[source,js]
--------------------------------------------------
POST /_reindex
{
"source": {
"index": "source"
},
"index": {
"index": "dest",
"pipeline": "some_ingest_pipeline"
}
}
--------------------------------------------------
// AUTOSENSE
[float] [float]
=== URL Parameters === URL Parameters

View File

@ -138,6 +138,15 @@ POST /twitter/_update_by_query?scroll_size=1000
-------------------------------------------------- --------------------------------------------------
// AUTOSENSE // AUTOSENSE
`_update_by_query` can also use the <<ingest>> feature by
specifying a `pipeline` like this:
[source,js]
--------------------------------------------------
POST /twitter/_update_by_query?pipeline=some_ingest_pipeline
--------------------------------------------------
// AUTOSENSE
[float] [float]
=== URL Parameters === URL Parameters

View File

@ -3,22 +3,25 @@
[partintro] [partintro]
-- --
Ingest node can be used to pre-process documents before the actual indexing takes place. You can use ingest node to pre-process documents before the actual indexing takes place.
This pre-processing happens by an ingest node that intercepts bulk and index requests, applies the This pre-processing happens by an ingest node that intercepts bulk and index requests, applies the
transformations and then passes the documents back to the index or bulk APIs. transformations, and then passes the documents back to the index or bulk APIs.
Ingest node is enabled by default. In order to disable ingest the following You can enable ingest on any node or even have dedicated ingest nodes. Ingest is enabled by default
setting should be configured in the elasticsearch.yml file: on all nodes. To disable ingest on a node, configure the following setting in the `elasticsearch.yml` file:
[source,yaml] [source,yaml]
-------------------------------------------------- --------------------------------------------------
node.ingest: false node.ingest: false
-------------------------------------------------- --------------------------------------------------
It is possible to enable ingest on any node or have dedicated ingest nodes. To pre-process documents before indexing, you <<pipe-line,define a pipeline>> that specifies
a series of <<ingest-processors,processors>>. Each processor transforms the document in some way.
For example, you may have a pipeline that consists of one processor that removes a field from
the document followed by another processor that renames a field.
In order to pre-process document before indexing the `pipeline` parameter should be used To use a pipeline, you simply specify the `pipeline` parameter on an index or bulk request to
on an index or bulk request to tell Ingest what pipeline is going to be used. tell the ingest node which pipeline to use. For example:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -29,6 +32,8 @@ PUT /my-index/my-type/my-id?pipeline=my_pipeline_id
-------------------------------------------------- --------------------------------------------------
// AUTOSENSE // AUTOSENSE
See <<ingest-apis,Ingest APIs>> for more information about creating, adding, and deleting pipelines.
-- --
include::ingest/ingest-node.asciidoc[] include::ingest/ingest-node.asciidoc[]

View File

@ -1,8 +1,10 @@
[[pipe-line]] [[pipe-line]]
== Pipeline Definition == Pipeline Definition
A pipeline is a definition of a series of processors that are to be A pipeline is a definition of a series of <<ingest-processors, processors>> that are to be executed
executed in the same sequential order as they are declared. in the same order as they are declared. A pipeline consists of two main fields: a `description`
and a list of `processors`:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
{ {
@ -11,17 +13,26 @@ executed in the same sequential order as they are declared.
} }
-------------------------------------------------- --------------------------------------------------
The `description` is a special field to store a helpful description of The `description` is a special field to store a helpful description of
what the pipeline attempts to achieve. what the pipeline does.
The `processors` parameter defines a list of processors to be executed in The `processors` parameter defines a list of processors to be executed in
order. order.
[[ingest-apis]]
== Ingest APIs == Ingest APIs
=== Put pipeline API The following ingest APIs are available for managing pipelines:
The put pipeline api adds pipelines and updates existing pipelines in the cluster. * <<put-pipeline-api>> to add or update a pipeline
* <<get-pipeline-api>> to return a specific pipeline
* <<delete-pipeline-api>> to delete a pipeline
* <<simulate-pipeline-api>> to simulate a call to a pipeline
[[put-pipeline-api]]
=== Put Pipeline API
The put pipeline API adds pipelines and updates existing pipelines in the cluster.
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -40,12 +51,13 @@ PUT _ingest/pipeline/my-pipeline-id
-------------------------------------------------- --------------------------------------------------
// AUTOSENSE // AUTOSENSE
NOTE: The put pipeline api also instructs all ingest nodes to reload their in-memory representation of pipelines, so that NOTE: The put pipeline API also instructs all ingest nodes to reload their in-memory representation of pipelines, so that
pipeline changes take immediately in effect. pipeline changes take effect immediately.
=== Get pipeline API [[get-pipeline-api]]
=== Get Pipeline API
The get pipeline api returns pipelines based on id. This api always returns a local reference of the pipeline. The get pipeline API returns pipelines based on ID. This API always returns a local reference of the pipeline.
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -75,13 +87,14 @@ Example response:
} }
-------------------------------------------------- --------------------------------------------------
For each returned pipeline the source and the version is returned. For each returned pipeline, the source and the version are returned.
The version is useful for knowing what version of the pipeline the node has. The version is useful for knowing which version of the pipeline the node has.
Multiple ids can be provided at the same time. Also wildcards are supported. You can specify multiple IDs to return more than one pipeline. Wildcards are also supported.
=== Delete pipeline API [[delete-pipeline-api]]
=== Delete Pipeline API
The delete pipeline api deletes pipelines by id. The delete pipeline API deletes pipelines by ID.
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -89,16 +102,18 @@ DELETE _ingest/pipeline/my-pipeline-id
-------------------------------------------------- --------------------------------------------------
// AUTOSENSE // AUTOSENSE
=== Simulate pipeline API [[simulate-pipeline-api]]
=== Simulate Pipeline API
The simulate pipeline api executes a specific pipeline against The simulate pipeline API executes a specific pipeline against
the set of documents provided in the body of the request. the set of documents provided in the body of the request.
A simulate request may call upon an existing pipeline to be executed You can either specify an existing pipeline to execute
against the provided documents, or supply a pipeline definition in against the provided documents, or supply a pipeline definition in
the body of the request. the body of the request.
Here is the structure of a simulate request with a provided pipeline: Here is the structure of a simulate request with a pipeline definition provided
in the body of the request:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -115,7 +130,7 @@ POST _ingest/pipeline/_simulate
} }
-------------------------------------------------- --------------------------------------------------
Here is the structure of a simulate request against a pre-existing pipeline: Here is the structure of a simulate request against an existing pipeline:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -130,7 +145,8 @@ POST _ingest/pipeline/my-pipeline-id/_simulate
-------------------------------------------------- --------------------------------------------------
Here is an example simulate request with a provided pipeline and its response: Here is an example of a simulate request with a pipeline defined in the request
and its response:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -170,7 +186,7 @@ POST _ingest/pipeline/_simulate
-------------------------------------------------- --------------------------------------------------
// AUTOSENSE // AUTOSENSE
response: Response:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -216,13 +232,14 @@ response:
} }
-------------------------------------------------- --------------------------------------------------
It is often useful to see how each processor affects the ingest document [[ingest-verbose-param]]
as it is passed through the pipeline. To see the intermediate results of ==== Viewing Verbose Results
each processor in the simulate request, a `verbose` parameter may be added You can use the simulate pipeline API to see how each processor affects the ingest document
to the request as it passes through the pipeline. To see the intermediate results of
each processor in the simulate request, you can add the `verbose` parameter
Here is an example verbose request and its response: to the request.
Here is an example of a verbose request and its response:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -268,7 +285,7 @@ POST _ingest/pipeline/_simulate?verbose
-------------------------------------------------- --------------------------------------------------
// AUTOSENSE // AUTOSENSE
response: Response:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -364,12 +381,16 @@ response:
} }
-------------------------------------------------- --------------------------------------------------
== Accessing data in pipelines [[accessing-data-in-pipelines]]
== Accessing Data in Pipelines
Processors in pipelines have read and write access to documents that pass through the pipeline. The processors in a pipeline have read and write access to documents that pass through the pipeline.
The fields in the source of a document and its metadata fields are accessible. The processors can access fields in the source of a document and the document's metadata fields.
Accessing a field in the source is straightforward and one can refer to fields by [float]
[[accessing-source-fields]]
=== Accessing Fields in the Source
Accessing a field in the source is straightforward. You simply refer to fields by
their name. For example: their name. For example:
[source,js] [source,js]
@ -382,7 +403,7 @@ their name. For example:
} }
-------------------------------------------------- --------------------------------------------------
On top of this fields from the source are always accessible via the `_source` prefix: On top of this, fields from the source are always accessible via the `_source` prefix:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -394,11 +415,14 @@ On top of this fields from the source are always accessible via the `_source` pr
} }
-------------------------------------------------- --------------------------------------------------
Metadata fields can also be accessed in the same way as fields from the source. This [float]
[[accessing-metadata-fields]]
=== Accessing Metadata Fields
You can access metadata fields in the same way that you access fields in the source. This
is possible because Elasticsearch doesn't allow fields in the source that have the is possible because Elasticsearch doesn't allow fields in the source that have the
same name as metadata fields. same name as metadata fields.
The following example sets the id of a document to `1`: The following example sets the `_id` metadata field of a document to `1`:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -411,15 +435,20 @@ The following example sets the id of a document to `1`:
-------------------------------------------------- --------------------------------------------------
The following metadata fields are accessible by a processor: `_index`, `_type`, `_id`, `_routing`, `_parent`, The following metadata fields are accessible by a processor: `_index`, `_type`, `_id`, `_routing`, `_parent`,
`_timestamp` and `_ttl`. `_timestamp`, and `_ttl`.
Beyond metadata fields and source fields, ingest also adds ingest metadata to documents being processed. [float]
[[accessing-ingest-metadata]]
=== Accessing Ingest Metadata Fields
Beyond metadata fields and source fields, ingest also adds ingest metadata to the documents that it processes.
These metadata properties are accessible under the `_ingest` key. Currently ingest adds the ingest timestamp These metadata properties are accessible under the `_ingest` key. Currently ingest adds the ingest timestamp
under `_ingest.timestamp` key to the ingest metadata, which is the time ES received the index or bulk under the `_ingest.timestamp` key of the ingest metadata. The ingest timestamp is the time when Elasticsearch
request to pre-process. But any processor is free to add more ingest related metadata to it. Ingest metadata is transient received the index or bulk request to pre-process the document.
and is lost after a document has been processed by the pipeline and thus ingest metadata won't be indexed.
The following example adds a field with the name `received` and the value is the ingest timestamp: Any processor can add ingest-related metadata during document processing. Ingest metadata is transient
and is lost after a document has been processed by the pipeline. Therefore, ingest metadata won't be indexed.
The following example adds a field with the name `received`. The value is the ingest timestamp:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -431,15 +460,18 @@ The following example adds a field with the name `received` and the value is the
} }
-------------------------------------------------- --------------------------------------------------
As opposed to Elasticsearch metadata fields, the ingest metadata field name _ingest can be used as a valid field name Unlike Elasticsearch metadata fields, the ingest metadata field name `_ingest` can be used as a valid field name
in the source of a document. Use _source._ingest to refer to it, otherwise _ingest will be interpreted as ingest in the source of a document. Use `_source._ingest` to refer to the field in the source document. Otherwise, `_ingest`
metadata fields. will be interpreted as an ingest metadata field.
[float]
[[accessing-template-fields]]
=== Accessing Fields and Metafields in Templates
A number of processor settings also support templating. Settings that support templating can have zero or more A number of processor settings also support templating. Settings that support templating can have zero or more
template snippets. A template snippet begins with `{{` and ends with `}}`. template snippets. A template snippet begins with `{{` and ends with `}}`.
Accessing fields and metafields in templates is exactly the same as via regular processor field settings. Accessing fields and metafields in templates is exactly the same as via regular processor field settings.
In this example a field by the name `field_c` is added and its value is a concatenation of The following example adds a field named `field_c`. Its value is a concatenation of
the values of `field_a` and `field_b`. the values of `field_a` and `field_b`.
[source,js] [source,js]
@ -452,8 +484,8 @@ the values of `field_a` and `field_b`.
} }
-------------------------------------------------- --------------------------------------------------
The following example changes the index a document is going to be indexed into. The index a document will be redirected The following example uses the value of the `geoip.country_iso_code` field in the source
to depends on the field in the source with name `geoip.country_iso_code`. to set the index that the document will be indexed into:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -466,25 +498,25 @@ to depends on the field in the source with name `geoip.country_iso_code`.
-------------------------------------------------- --------------------------------------------------
[[handling-failure-in-pipelines]] [[handling-failure-in-pipelines]]
=== Handling Failure in Pipelines == Handling Failures in Pipelines
In its simplest case, pipelines describe a list of processors which In its simplest use case, a pipeline defines a list of processors that
are executed sequentially and processing halts at the first exception. This are executed sequentially, and processing halts at the first exception. This
may not be desirable when failures are expected. For example, not all your logs behavior may not be desirable when failures are expected. For example, you may have logs
may match a certain grok expression and you may wish to index such documents into that don't match the specified grok expression. Instead of halting execution, you may
a separate index. want to index such documents into a separate index.
To enable this behavior, you can utilize the `on_failure` parameter. `on_failure` To enable this behavior, you can use the `on_failure` parameter. The `on_failure` parameter
defines a list of processors to be executed immediately following the failed processor. defines a list of processors to be executed immediately following the failed processor.
This parameter can be supplied at the pipeline level, as well as at the processor You can specify this parameter at the pipeline level, as well as at the processor
level. If a processor has an `on_failure` configuration option provided, whether level. If a processor specifies an `on_failure` configuration, whether
it is empty or not, any exceptions that are thrown by it will be caught and the it is empty or not, any exceptions that are thrown by the processor are caught, and the
pipeline will continue executing the proceeding processors defined. Since further processors pipeline continues executing the remaining processors. Because you can define further processors
are defined within the scope of an `on_failure` statement, failure handling can be nested. within the scope of an `on_failure` statement, you can nest failure handling.
Example: In the following example we define a pipeline that hopes to rename documents with The following example defines a pipeline that renames the `foo` field in
a field named `foo` to `bar`. If the document does not contain the `foo` field, we the processed document to `bar`. If the document does not contain the `foo` field, the processor
go ahead and attach an error message within the document for later analysis within attaches an error message to the document for later analysis within
Elasticsearch. Elasticsearch.
[source,js] [source,js]
@ -510,8 +542,8 @@ Elasticsearch.
} }
-------------------------------------------------- --------------------------------------------------
Example: Here we define an `on_failure` block on a whole pipeline to change The following example defines an `on_failure` block on a whole pipeline to change
the index for which failed documents get sent. the index to which failed documents get sent.
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -529,15 +561,18 @@ the index for which failed documents get sent.
} }
-------------------------------------------------- --------------------------------------------------
[float]
[[accessing-error-metadata]]
=== Accessing Error Metadata From Processors Handling Exceptions
==== Accessing Error Metadata From Processors Handling Exceptions You may want to retrieve the actual error message that was thrown
by a failed processor. To do so you can access metadata fields called
`on_failure_message`, `on_failure_processor_type`, and `on_failure_processor_tag`. These fields are only accessible
from within the context of an `on_failure` block.
Sometimes you may want to retrieve the actual error message that was thrown Here is an updated version of the example that you
by a failed processor. To do so you can access metadata fields called saw earlier. But instead of setting the error message manually, the example leverages the `on_failure_message`
`on_failure_message`, `on_failure_processor_type`, `on_failure_processor_tag`. These fields are only accessible metadata field to provide the error message.
from within the context of an `on_failure` block. Here is an updated version of
our first example which leverages these fields to provide the error message instead
of manually setting it.
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -562,6 +597,7 @@ of manually setting it.
} }
-------------------------------------------------- --------------------------------------------------
[[ingest-processors]]
== Processors == Processors
All processors are defined in the following way within a pipeline definition: All processors are defined in the following way within a pipeline definition:
@ -575,19 +611,20 @@ All processors are defined in the following way within a pipeline definition:
} }
-------------------------------------------------- --------------------------------------------------
Each processor defines its own configuration parameters, but all processors have Each processor defines its own configuration parameters, but all processors have
the ability to declare `tag` and `on_failure` fields. These fields are optional. the ability to declare `tag` and `on_failure` fields. These fields are optional.
A `tag` is simply a string identifier of the specific instantiation of a certain A `tag` is simply a string identifier of the specific instantiation of a certain
processor in a pipeline. The `tag` field does not affect any processor's behavior, processor in a pipeline. The `tag` field does not affect the processor's behavior,
but is very useful for bookkeeping and tracing errors to specific processors. but is very useful for bookkeeping and tracing errors to specific processors.
See <<handling-failure-in-pipelines>> to learn more about the `on_failure` field and error handling in pipelines. See <<handling-failure-in-pipelines>> to learn more about the `on_failure` field and error handling in pipelines.
=== Append processor [[append-procesesor]]
=== Append Processor
Appends one or more values to an existing array if the field already exists and it is an array. Appends one or more values to an existing array if the field already exists and it is an array.
Converts a scalar to an array and appends one or more values to it if the field exists and it is a scalar. Converts a scalar to an array and appends one or more values to it if the field exists and it is a scalar.
Creates an array containing the provided values if the fields doesn't exist. Creates an array containing the provided values if the field doesn't exist.
Accepts a single value or an array of values. Accepts a single value or an array of values.
[[append-options]] [[append-options]]
@ -609,14 +646,15 @@ Accepts a single value or an array of values.
} }
-------------------------------------------------- --------------------------------------------------
=== Convert processor [[convert-processor]]
Converts an existing field's value to a different type, like turning a string to an integer. === Convert Processor
Converts an existing field's value to a different type, such as converting a string to an integer.
If the field value is an array, all members will be converted. If the field value is an array, all members will be converted.
The supported types include: `integer`, `float`, `string`, and `boolean`. The supported types include: `integer`, `float`, `string`, and `boolean`.
`boolean` will set the field to true if its string value is equal to `true` (ignore case), to Specifying `boolean` will set the field to true if its string value is equal to `true` (ignore case), to
false if its string value is equal to `false` (ignore case) and it will throw exception otherwise. false if its string value is equal to `false` (ignore case), or it will throw an exception otherwise.
[[convert-options]] [[convert-options]]
.Convert Options .Convert Options
@ -637,12 +675,14 @@ false if its string value is equal to `false` (ignore case) and it will throw ex
} }
-------------------------------------------------- --------------------------------------------------
=== Date processor [[date-processor]]
=== Date Processor
The date processor is used for parsing dates from fields, and then using that date or timestamp as the timestamp for that document. Parses dates from fields, and then uses the date or timestamp as the timestamp for the document.
The date processor adds by default the parsed date as a new field called `@timestamp`, configurable by setting the `target_field` By default, the date processor adds the parsed date as a new field called `@timestamp`. You can specify a
configuration parameter. Multiple date formats are supported as part of the same date processor definition. They will be used different field by setting the `target_field` configuration parameter. Multiple date formats are supported
sequentially to attempt parsing the date field, in the same order they were defined as part of the processor definition. as part of the same date processor definition. They will be used sequentially to attempt parsing the date field,
in the same order they were defined as part of the processor definition.
[[date-options]] [[date-options]]
.Date options .Date options
@ -651,12 +691,12 @@ sequentially to attempt parsing the date field, in the same order they were defi
| Name | Required | Default | Description | Name | Required | Default | Description
| `match_field` | yes | - | The field to get the date from. | `match_field` | yes | - | The field to get the date from.
| `target_field` | no | @timestamp | The field that will hold the parsed date. | `target_field` | no | @timestamp | The field that will hold the parsed date.
| `match_formats` | yes | - | Array of the expected date formats. Can be a joda pattern or one of the following formats: ISO8601, UNIX, UNIX_MS, TAI64N. | `match_formats` | yes | - | An array of the expected date formats. Can be a Joda pattern or one of the following formats: ISO8601, UNIX, UNIX_MS, or TAI64N.
| `timezone` | no | UTC | The timezone to use when parsing the date. | `timezone` | no | UTC | The timezone to use when parsing the date.
| `locale` | no | ENGLISH | The locale to use when parsing the date, relevant when parsing month names or week days. | `locale` | no | ENGLISH | The locale to use when parsing the date, relevant when parsing month names or week days.
|====== |======
An example that adds the parsed date to the `timestamp` field based on the `initial_date` field: Here is an example that adds the parsed date to the `timestamp` field based on the `initial_date` field:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -675,9 +715,10 @@ An example that adds the parsed date to the `timestamp` field based on the `init
} }
-------------------------------------------------- --------------------------------------------------
=== Fail processor [[fail-processor]]
The Fail Processor is used to raise an exception. This is useful for when === Fail Processor
a user expects a pipeline to fail and wishes to relay a specific message Raises an exception. This is useful for when
you expect a pipeline to fail and want to relay a specific message
to the requester. to the requester.
[[fail-options]] [[fail-options]]
@ -697,17 +738,20 @@ to the requester.
} }
-------------------------------------------------- --------------------------------------------------
=== Foreach processor [[foreach-processor]]
All processors can operate on elements inside an array, but if all elements of an array need to === Foreach Processor
be processed in the same way defining a processor for each element becomes cumbersome and tricky Processes elements in an array of unknown length.
because it is likely that the number of elements in an array are unknown. For this reason the `foreach`
processor is exists. By specifying the field holding array elements and a list of processors that
define what should happen to each element, array field can easily be preprocessed.
Processors inside the foreach processor work in a different context and the only valid top level All processors can operate on elements inside an array, but if all elements of an array need to
be processed in the same way, defining a processor for each element becomes cumbersome and tricky
because it is likely that the number of elements in an array is unknown. For this reason the `foreach`
processor exists. By specifying the field holding array elements and a list of processors that
define what should happen to each element, array fields can easily be preprocessed.
Processors inside the foreach processor work in a different context, and the only valid top-level
field is `_value`, which holds the array element value. Under this field other fields may exist. field is `_value`, which holds the array element value. Under this field other fields may exist.
If the `foreach` processor failed to process an element inside the array and no `on_failure` processor has been specified If the `foreach` processor fails to process an element inside the array, and no `on_failure` processor has been specified,
then it aborts the execution and leaves the array unmodified. then it aborts the execution and leaves the array unmodified.
[[foreach-options]] [[foreach-options]]
@ -755,7 +799,7 @@ Then the document will look like this after preprocessing:
} }
-------------------------------------------------- --------------------------------------------------
Lets take a look at another example: Let's take a look at another example:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -773,8 +817,8 @@ Lets take a look at another example:
} }
-------------------------------------------------- --------------------------------------------------
and in the case the `id` field needs to be removed In this case, the `id` field needs to be removed,
then the following `foreach` processor can be used: so the following `foreach` processor is used:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -808,12 +852,12 @@ After preprocessing the result is:
} }
-------------------------------------------------- --------------------------------------------------
Like on any processor `on_failure` processors can also be defined As for any processor, you can define `on_failure` processors
in processors that wrapped inside the `foreach` processor. in processors that are wrapped inside the `foreach` processor.
For example the `id` field may not exist on all person objects and For example, the `id` field may not exist on all person objects.
instead of failing the index request, the document will be send to Instead of failing the index request, you can use an `on_failure`
the 'failure_index' index for later inspection: block to send the document to the 'failure_index' index for later inspection:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -839,14 +883,15 @@ the 'failure_index' index for later inspection:
} }
-------------------------------------------------- --------------------------------------------------
In this example if the `remove` processor does fail then In this example, if the `remove` processor does fail, then
the array elements that have been processed thus far will the array elements that have been processed thus far will
be updated. be updated.
[[grok-processor]]
=== Grok Processor === Grok Processor
The Grok Processor extracts structured fields out of a single text field within a document. You choose which field to Extracts structured fields out of a single text field within a document. You choose which field to
extract matched fields from, as well as the Grok Pattern you expect will match. A Grok Pattern is like a regular extract matched fields from, as well as the grok pattern you expect will match. A grok pattern is like a regular
expression that supports aliased expressions that can be reused. expression that supports aliased expressions that can be reused.
This tool is perfect for syslog logs, apache and other webserver logs, mysql logs, and in general, any log format This tool is perfect for syslog logs, apache and other webserver logs, mysql logs, and in general, any log format
@ -858,6 +903,7 @@ Here, you can add your own custom grok pattern files with custom grok expression
If you need help building patterns to match your logs, you will find the <http://grokdebug.herokuapp.com> and If you need help building patterns to match your logs, you will find the <http://grokdebug.herokuapp.com> and
<http://grokconstructor.appspot.com/> applications quite useful! <http://grokconstructor.appspot.com/> applications quite useful!
[[grok-basics]]
==== Grok Basics ==== Grok Basics
Grok sits on top of regular expressions, so any regular expressions are valid in grok as well. Grok sits on top of regular expressions, so any regular expressions are valid in grok as well.
@ -867,7 +913,7 @@ https://github.com/kkos/oniguruma/blob/master/doc/RE[on the Onigiruma site].
Grok works by leveraging this regular expression language to allow naming existing patterns and combining them into more Grok works by leveraging this regular expression language to allow naming existing patterns and combining them into more
complex patterns that match your fields. complex patterns that match your fields.
The syntax for re-using a grok pattern comes in three forms: `%{SYNTAX:SEMANTIC}`, `%{SYNTAX}`, `%{SYNTAX:SEMANTIC:TYPE}`. The syntax for reusing a grok pattern comes in three forms: `%{SYNTAX:SEMANTIC}`, `%{SYNTAX}`, `%{SYNTAX:SEMANTIC:TYPE}`.
The `SYNTAX` is the name of the pattern that will match your text. For example, `3.44` will be matched by the `NUMBER` The `SYNTAX` is the name of the pattern that will match your text. For example, `3.44` will be matched by the `NUMBER`
pattern and `55.3.244.1` will be matched by the `IP` pattern. The syntax is how you match. `NUMBER` and `IP` are both pattern and `55.3.244.1` will be matched by the `IP` pattern. The syntax is how you match. `NUMBER` and `IP` are both
@ -879,15 +925,14 @@ the `client` making a request.
The `TYPE` is the type you wish to cast your named field. `int` and `float` are currently the only types supported for coercion. The `TYPE` is the type you wish to cast your named field. `int` and `float` are currently the only types supported for coercion.
For example, here is a grok pattern that would match the above example given. We would like to match a text with the following For example, you might want to match the following text:
contents:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
3.44 55.3.244.1 3.44 55.3.244.1
-------------------------------------------------- --------------------------------------------------
We may know that the above message is a number followed by an IP-address. We can match this text with the following You may know that the message in the example is a number followed by an IP address. You can match this text by using the following
Grok expression. Grok expression.
[source,js] [source,js]
@ -895,9 +940,10 @@ Grok expression.
%{NUMBER:duration} %{IP:client} %{NUMBER:duration} %{IP:client}
-------------------------------------------------- --------------------------------------------------
[[custom-patterns]]
==== Custom Patterns and Pattern Files ==== Custom Patterns and Pattern Files
The Grok Processor comes pre-packaged with a base set of pattern files. These patterns may not always have The Grok processor comes pre-packaged with a base set of pattern files. These patterns may not always have
what you are looking for. These pattern files have a very basic format. Each line describes a named pattern with what you are looking for. These pattern files have a very basic format. Each line describes a named pattern with
the following format: the following format:
@ -906,11 +952,11 @@ the following format:
NAME ' '+ PATTERN '\n' NAME ' '+ PATTERN '\n'
-------------------------------------------------- --------------------------------------------------
You can add this pattern to an existing file, or add your own file in the patterns directory here: `$ES_HOME/config/ingest/grok/patterns`. You can add new patterns to an existing file, or add your own file in the patterns directory here: `$ES_HOME/config/ingest/grok/patterns`.
The Ingest Plugin will pick up files in this directory to be loaded into the grok processor's known patterns. These patterns are loaded Ingest node picks up files in this directory and loads the patterns into the grok processor's known patterns.
at startup, so you will need to do a restart your ingest node if you wish to update these files while running. These patterns are loaded at startup, so you need to restart your ingest node if you want to update these files.
Example snippet of pattern definitions found in the `grok-patterns` patterns file: Here is an example snippet of pattern definitions found in the `grok-patterns` patterns file:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -921,7 +967,8 @@ SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
-------------------------------------------------- --------------------------------------------------
==== Using Grok Processor in a Pipeline [[using-grok]]
==== Using the Grok Processor in a Pipeline
[[grok-options]] [[grok-options]]
.Grok Options .Grok Options
@ -943,14 +990,14 @@ a document.
} }
-------------------------------------------------- --------------------------------------------------
The pattern for this could be The pattern for this could be:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration} %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
-------------------------------------------------- --------------------------------------------------
An example pipeline for processing the above document using Grok: Here is an example pipeline for processing the above document by using Grok:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -981,7 +1028,7 @@ This pipeline will insert these named captures as new fields within the document
} }
-------------------------------------------------- --------------------------------------------------
An example of a pipeline specifying custom pattern definitions: Here is an example of a pipeline specifying custom pattern definitions:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -1002,7 +1049,8 @@ An example of a pipeline specifying custom pattern definitions:
} }
-------------------------------------------------- --------------------------------------------------
=== Gsub processor [[gsub-processor]]
=== Gsub Processor
Converts a string field by applying a regular expression and a replacement. Converts a string field by applying a regular expression and a replacement.
If the field is not a string, the processor will throw an exception. If the field is not a string, the processor will throw an exception.
@ -1011,9 +1059,9 @@ If the field is not a string, the processor will throw an exception.
[options="header"] [options="header"]
|====== |======
| Name | Required | Default | Description | Name | Required | Default | Description
| `field` | yes | - | The field apply the replacement for | `field` | yes | - | The field to apply the replacement to
| `pattern` | yes | - | The pattern to be replaced | `pattern` | yes | - | The pattern to be replaced
| `replacement` | yes | - | The string to replace the matching patterns with. | `replacement` | yes | - | The string to replace the matching patterns with
|====== |======
[source,js] [source,js]
@ -1027,9 +1075,10 @@ If the field is not a string, the processor will throw an exception.
} }
-------------------------------------------------- --------------------------------------------------
=== Join processor [[join-processor]]
=== Join Processor
Joins each element of an array into a single string using a separator character between each element. Joins each element of an array into a single string using a separator character between each element.
Throws error when the field is not an array. Throws an error when the field is not an array.
[[join-options]] [[join-options]]
.Join Options .Join Options
@ -1050,7 +1099,8 @@ Throws error when the field is not an array.
} }
-------------------------------------------------- --------------------------------------------------
=== Lowercase processor [[lowercase-processor]]
=== Lowercase Processor
Converts a string to its lowercase equivalent. Converts a string to its lowercase equivalent.
[[lowercase-options]] [[lowercase-options]]
@ -1058,7 +1108,7 @@ Converts a string to its lowercase equivalent.
[options="header"] [options="header"]
|====== |======
| Name | Required | Default | Description | Name | Required | Default | Description
| `field` | yes | - | The field to lowercase | `field` | yes | - | The field to make lowercase
|====== |======
[source,js] [source,js]
@ -1070,8 +1120,9 @@ Converts a string to its lowercase equivalent.
} }
-------------------------------------------------- --------------------------------------------------
=== Remove processor [[remove-processor]]
Removes an existing field. If the field doesn't exist, an exception will be thrown === Remove Processor
Removes an existing field. If the field doesn't exist, an exception will be thrown.
[[remove-options]] [[remove-options]]
.Remove Options .Remove Options
@ -1090,9 +1141,9 @@ Removes an existing field. If the field doesn't exist, an exception will be thro
} }
-------------------------------------------------- --------------------------------------------------
=== Rename processor [[rename-processor]]
Renames an existing field. If the field doesn't exist, an exception will be thrown. Also, the new field === Rename Processor
name must not exist. Renames an existing field. If the field doesn't exist or the new name is already used, an exception will be thrown.
[[rename-options]] [[rename-options]]
.Rename Options .Rename Options
@ -1113,7 +1164,8 @@ name must not exist.
} }
-------------------------------------------------- --------------------------------------------------
=== Set processor [[set-processor]]
=== Set Processor
Sets one field and associates it with the specified value. If the field already exists, Sets one field and associates it with the specified value. If the field already exists,
its value will be replaced with the provided one. its value will be replaced with the provided one.
@ -1136,8 +1188,9 @@ its value will be replaced with the provided one.
} }
-------------------------------------------------- --------------------------------------------------
=== Split processor [[split-processor]]
Split a field to an array using a separator character. Only works on string fields. === Split Processor
Splits a field into an array using a separator character. Only works on string fields.
[[split-options]] [[split-options]]
.Split Options .Split Options
@ -1156,8 +1209,11 @@ Split a field to an array using a separator character. Only works on string fiel
} }
-------------------------------------------------- --------------------------------------------------
=== Trim processor [[trim-processor]]
Trims whitespace from field. NOTE: this only works on leading and trailing whitespaces. === Trim Processor
Trims whitespace from field.
NOTE: This only works on leading and trailing whitespace.
[[trim-options]] [[trim-options]]
.Trim Options .Trim Options
@ -1176,7 +1232,8 @@ Trims whitespace from field. NOTE: this only works on leading and trailing white
} }
-------------------------------------------------- --------------------------------------------------
=== Uppercase processor [[uppercase-processor]]
=== Uppercase Processor
Converts a string to its uppercase equivalent. Converts a string to its uppercase equivalent.
[[uppercase-options]] [[uppercase-options]]
@ -1184,7 +1241,7 @@ Converts a string to its uppercase equivalent.
[options="header"] [options="header"]
|====== |======
| Name | Required | Default | Description | Name | Required | Default | Description
| `field` | yes | - | The field to uppercase | `field` | yes | - | The field to make uppercase
|====== |======
[source,js] [source,js]

View File

@ -161,7 +161,7 @@ Individual fields can be included or excluded from the `_all` field with the
[[all-field-and-boosting]] [[all-field-and-boosting]]
==== Index boosting and the `_all` field ==== Index boosting and the `_all` field
Individual fields can be _boosted_ at index time, with the <<index-boost,`boost`>> Individual fields can be _boosted_ at index time, with the <<mapping-boost,`boost`>>
parameter. The `_all` field takes these boosts into account: parameter. The `_all` field takes these boosts into account:
[source,js] [source,js]

View File

@ -8,7 +8,7 @@ parameters that are used by <<mapping-types,field mappings>>:
The following mapping parameters are common to some or all field datatypes: The following mapping parameters are common to some or all field datatypes:
* <<analyzer,`analyzer`>> * <<analyzer,`analyzer`>>
* <<index-boost,`boost`>> * <<mapping-boost,`boost`>>
* <<coerce,`coerce`>> * <<coerce,`coerce`>>
* <<copy-to,`copy_to`>> * <<copy-to,`copy_to`>>
* <<doc-values,`doc_values`>> * <<doc-values,`doc_values`>>

View File

@ -1,8 +1,8 @@
[[index-boost]] [[mapping-boost]]
=== `boost` === `boost`
Individual fields can be _boosted_ -- count more towards the relevance score Individual fields can be _boosted_ automatically -- count more towards the relevance score
-- at index time, with the `boost` parameter as follows: -- at query time, with the `boost` parameter as follows:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -28,10 +28,45 @@ PUT my_index
<1> Matches on the `title` field will have twice the weight as those on the <1> Matches on the `title` field will have twice the weight as those on the
`content` field, which has the default `boost` of `1.0`. `content` field, which has the default `boost` of `1.0`.
Note that a `title` field will usually be shorter than a `content` field. The NOTE: The boost is applied only for term queries (prefix, range and fuzzy queries are not _boosted_).
default relevance calculation takes field length into account, so a short
`title` field will have a higher natural boost than a long `content` field.
You can achieve the same effect by using the boost parameter directly in the query, for instance the following query (with field time boost):
[source,js]
--------------------------------------------------
{
"match" : {
"title": {
"query": "quick brown fox"
}
}
}
--------------------------------------------------
is equivalent to:
[source,js]
--------------------------------------------------
{
"match" : {
"title": {
"query": "quick brown fox",
"boost": 2
}
}
}
--------------------------------------------------
// AUTOSENSE
The boost is also applied when it is copied with the
value in the <<mapping-all-field,`_all`>> field. This means that, when
querying the `_all` field, words that originated from the `title` field will
have a higher score than words that originated in the `content` field.
This functionality comes at a cost: queries on the `_all` field are slower
when field boosting is used.
deprecated[5.0.0, index time boost is deprecated. Instead, the field mapping boost is applied at query time. For indices created before 5.0.0 the boost will still be applied at index time.]
[WARNING] [WARNING]
.Why index time boosting is a bad idea .Why index time boosting is a bad idea
================================================== ==================================================
@ -48,12 +83,4 @@ We advise against using index time boosting for the following reasons:
byte. This reduces the resolution of the field length normalization factor byte. This reduces the resolution of the field length normalization factor
which can lead to lower quality relevance calculations. which can lead to lower quality relevance calculations.
================================================== ==================================================
The only advantage that index time boosting has is that it is copied with the
value into the <<mapping-all-field,`_all`>> field. This means that, when
querying the `_all` field, words that originated from the `title` field will
have a higher score than words that originated in the `content` field.
This functionality comes at a cost: queries on the `_all` field are slower
when index-time boosting is used.

View File

@ -1,10 +1,8 @@
[[norms]] [[norms]]
=== `norms` === `norms`
Norms store various normalization factors -- a number to represent the Norms store various normalization factors that are later used at query time
relative field length and the <<index-boost,index time `boost`>> setting -- in order to compute the score of a document relatively to a query.
that are later used at query time in order to compute the score of a document
relatively to a query.
Although useful for scoring, norms also require quite a lot of memory Although useful for scoring, norms also require quite a lot of memory
(typically in the order of one byte per document per field in your index, even (typically in the order of one byte per document per field in your index, even

View File

@ -91,9 +91,9 @@ The following parameters are accepted by `boolean` fields:
[horizontal] [horizontal]
<<index-boost,`boost`>>:: <<mapping-boost,`boost`>>::
Field-level index time boosting. Accepts a floating point number, defaults Mapping field-level query time boosting. Accepts a floating point number, defaults
to `1.0`. to `1.0`.
<<doc-values,`doc_values`>>:: <<doc-values,`doc_values`>>::

View File

@ -90,9 +90,9 @@ The following parameters are accepted by `date` fields:
[horizontal] [horizontal]
<<index-boost,`boost`>>:: <<mapping-boost,`boost`>>::
Field-level index time boosting. Accepts a floating point number, defaults Mapping field-level query time boosting. Accepts a floating point number, defaults
to `1.0`. to `1.0`.
<<doc-values,`doc_values`>>:: <<doc-values,`doc_values`>>::

View File

@ -47,9 +47,9 @@ The following parameters are accepted by `ip` fields:
[horizontal] [horizontal]
<<index-boost,`boost`>>:: <<mapping-boost,`boost`>>::
Field-level index time boosting. Accepts a floating point number, defaults Mapping field-level query time boosting. Accepts a floating point number, defaults
to `1.0`. to `1.0`.
<<doc-values,`doc_values`>>:: <<doc-values,`doc_values`>>::

View File

@ -45,9 +45,9 @@ The following parameters are accepted by numeric types:
Try to convert strings to numbers and truncate fractions for integers. Try to convert strings to numbers and truncate fractions for integers.
Accepts `true` (default) and `false`. Accepts `true` (default) and `false`.
<<index-boost,`boost`>>:: <<mapping-boost,`boost`>>::
Field-level index time boosting. Accepts a floating point number, defaults Mapping field-level query time boosting. Accepts a floating point number, defaults
to `1.0`. to `1.0`.
<<doc-values,`doc_values`>>:: <<doc-values,`doc_values`>>::

View File

@ -75,9 +75,9 @@ The following parameters are accepted by `string` fields:
Defaults to the default index analyzer, or the Defaults to the default index analyzer, or the
<<analysis-standard-analyzer,`standard` analyzer>>. <<analysis-standard-analyzer,`standard` analyzer>>.
<<index-boost,`boost`>>:: <<mapping-boost,`boost`>>::
Field-level index time boosting. Accepts a floating point number, defaults Mapping field-level query time boosting. Accepts a floating point number, defaults
to `1.0`. to `1.0`.
<<doc-values,`doc_values`>>:: <<doc-values,`doc_values`>>::

View File

@ -68,9 +68,9 @@ The following parameters are accepted by `token_count` fields:
value. Required. For best performance, use an analyzer without token value. Required. For best performance, use an analyzer without token
filters. filters.
<<index-boost,`boost`>>:: <<mapping-boost,`boost`>>::
Field-level index time boosting. Accepts a floating point number, defaults Mapping field-level query time boosting. Accepts a floating point number, defaults
to `1.0`. to `1.0`.
<<doc-values,`doc_values`>>:: <<doc-values,`doc_values`>>::

View File

@ -449,3 +449,8 @@ The docs for the `nested` field datatype have moved to <<nested>>.
Warmers have been removed. There have been significant improvements to the Warmers have been removed. There have been significant improvements to the
index that make warmers not necessary anymore. index that make warmers not necessary anymore.
[role="exclude",id="index-boost"]
=== Index time boosting
The index time boost mapping has been replaced with query time boost (see <<mapping-boost>>).

View File

@ -60,8 +60,8 @@ public class RestReindexAction extends AbstractBaseReindexRestHandler<ReindexReq
static { static {
ObjectParser.Parser<SearchRequest, ReindexParseContext> sourceParser = (parser, search, context) -> { ObjectParser.Parser<SearchRequest, ReindexParseContext> sourceParser = (parser, search, context) -> {
/* /*
* Extract the parameters that we need from the parser. We could do * Extract the parameters that we need from the source sent to the parser. We could do away with this hack when search source
* away with this hack when search source has an ObjectParser. * has an ObjectParser.
*/ */
Map<String, Object> source = parser.map(); Map<String, Object> source = parser.map();
String[] indices = extractStringArray(source, "index"); String[] indices = extractStringArray(source, "index");
@ -84,6 +84,7 @@ public class RestReindexAction extends AbstractBaseReindexRestHandler<ReindexReq
destParser.declareString(IndexRequest::type, new ParseField("type")); destParser.declareString(IndexRequest::type, new ParseField("type"));
destParser.declareString(IndexRequest::routing, new ParseField("routing")); destParser.declareString(IndexRequest::routing, new ParseField("routing"));
destParser.declareString(IndexRequest::opType, new ParseField("opType")); destParser.declareString(IndexRequest::opType, new ParseField("opType"));
destParser.declareString(IndexRequest::setPipeline, new ParseField("pipeline"));
destParser.declareString((s, i) -> s.versionType(VersionType.fromString(i)), new ParseField("versionType")); destParser.declareString((s, i) -> s.versionType(VersionType.fromString(i)), new ParseField("versionType"));
// These exist just so the user can get a nice validation error: // These exist just so the user can get a nice validation error:

View File

@ -105,8 +105,10 @@ public class RestUpdateByQueryAction extends
parseCommon(internalRequest, request); parseCommon(internalRequest, request);
internalRequest.setSize(internalRequest.getSearchRequest().source().size()); internalRequest.setSize(internalRequest.getSearchRequest().source().size());
internalRequest.setPipeline(request.param("pipeline"));
internalRequest.getSearchRequest().source().size(request.paramAsInt("scroll_size", scrollSize)); internalRequest.getSearchRequest().source().size(request.paramAsInt("scroll_size", scrollSize));
execute(request, internalRequest, channel); execute(request, internalRequest, channel);
} }
} }

View File

@ -158,6 +158,7 @@ public class TransportReindexAction extends HandledTransportAction<ReindexReques
index.timestamp(mainRequest.getDestination().timestamp()); index.timestamp(mainRequest.getDestination().timestamp());
index.ttl(mainRequest.getDestination().ttl()); index.ttl(mainRequest.getDestination().ttl());
index.contentType(mainRequest.getDestination().getContentType()); index.contentType(mainRequest.getDestination().getContentType());
index.setPipeline(mainRequest.getDestination().getPipeline());
// OpType is synthesized from version so it is handled when we copy version above. // OpType is synthesized from version so it is handled when we copy version above.
return index; return index;

View File

@ -90,6 +90,7 @@ public class TransportUpdateByQueryAction extends HandledTransportAction<UpdateB
index.source(doc.sourceRef()); index.source(doc.sourceRef());
index.versionType(VersionType.INTERNAL); index.versionType(VersionType.INTERNAL);
index.version(doc.version()); index.version(doc.version());
index.setPipeline(mainRequest.getPipeline());
return index; return index;
} }

View File

@ -26,6 +26,11 @@ import org.elasticsearch.action.search.SearchRequest;
* locations or IDs. * locations or IDs.
*/ */
public class UpdateByQueryRequest extends AbstractBulkIndexByScrollRequest<UpdateByQueryRequest> { public class UpdateByQueryRequest extends AbstractBulkIndexByScrollRequest<UpdateByQueryRequest> {
/**
* Ingest pipeline to set on index requests made by this action.
*/
private String pipeline;
public UpdateByQueryRequest() { public UpdateByQueryRequest() {
} }
@ -33,6 +38,20 @@ public class UpdateByQueryRequest extends AbstractBulkIndexByScrollRequest<Updat
super(search); super(search);
} }
/**
* Set the ingest pipeline to set on index requests made by this action.
*/
public void setPipeline(String pipeline) {
this.pipeline = pipeline;
}
/**
* Ingest pipeline to set on index requests made by this action.
*/
public String getPipeline() {
return pipeline;
}
@Override @Override
protected UpdateByQueryRequest self() { protected UpdateByQueryRequest self() {
return this; return this;

View File

@ -0,0 +1,46 @@
---
"Modify a document":
- do:
ingest.put_pipeline:
id: "test_ingest"
body: >
{
"description": "tests reindex with ingest",
"processors": [
{
"append" : {
"field" : "new_field",
"value": "cat"
}
}
]
}
- do:
index:
index: twitter
type: tweet
id: 1
body: { "user": "kimchy" }
- do:
indices.refresh: {}
- do:
reindex:
refresh: true
body:
source:
index: twitter
dest:
index: new_twitter
pipeline: test_ingest
- match: {created: 1}
- match: {noops: 0}
- do:
search:
index: new_twitter
body:
query:
match:
new_field: cat
- match: { hits.total: 1 }

View File

@ -0,0 +1,42 @@
---
"Update a document using update-by-query":
- do:
ingest.put_pipeline:
id: "test_ingest"
body: >
{
"description": "tests reindex with ingest",
"processors": [
{
"append" : {
"field" : "new_field",
"value": "cat"
}
}
]
}
- do:
index:
index: twitter
type: tweet
id: 1
body: { "user": "kimchy" }
- do:
indices.refresh: {}
- do:
update-by-query:
index: twitter
refresh: true
pipeline: test_ingest
- match: {updated: 1}
- match: {noops: 0}
- do:
search:
index: twitter
body:
query:
match:
new_field: cat
- match: { hits.total: 1 }

View File

@ -19,12 +19,19 @@
package org.elasticsearch.mapper.attachments; package org.elasticsearch.mapper.attachments;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.common.settings.SettingsModule;
import org.elasticsearch.indices.IndicesModule; import org.elasticsearch.indices.IndicesModule;
import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.Plugin;
public class MapperAttachmentsPlugin extends Plugin { public class MapperAttachmentsPlugin extends Plugin {
private static ESLogger logger = ESLoggerFactory.getLogger("mapper.attachment");
private static DeprecationLogger deprecationLogger = new DeprecationLogger(logger);
@Override @Override
public String name() { public String name() {
return "mapper-attachments"; return "mapper-attachments";
@ -36,6 +43,7 @@ public class MapperAttachmentsPlugin extends Plugin {
} }
public void onModule(SettingsModule settingsModule) { public void onModule(SettingsModule settingsModule) {
deprecationLogger.deprecated("[mapper-attachments] plugin has been deprecated and will be replaced by [ingest-attachment] plugin.");
settingsModule.registerSetting(AttachmentMapper.INDEX_ATTACHMENT_DETECT_LANGUAGE_SETTING); settingsModule.registerSetting(AttachmentMapper.INDEX_ATTACHMENT_DETECT_LANGUAGE_SETTING);
settingsModule.registerSetting(AttachmentMapper.INDEX_ATTACHMENT_IGNORE_ERRORS_SETTING); settingsModule.registerSetting(AttachmentMapper.INDEX_ATTACHMENT_IGNORE_ERRORS_SETTING);
settingsModule.registerSetting(AttachmentMapper.INDEX_ATTACHMENT_INDEXED_CHARS_SETTING); settingsModule.registerSetting(AttachmentMapper.INDEX_ATTACHMENT_INDEXED_CHARS_SETTING);

View File

@ -24,3 +24,9 @@ dependencies {
testCompile project(path: ':plugins:ingest-geoip', configuration: 'runtime') testCompile project(path: ':plugins:ingest-geoip', configuration: 'runtime')
testCompile project(path: ':modules:lang-mustache', configuration: 'runtime') testCompile project(path: ':modules:lang-mustache', configuration: 'runtime')
} }
integTest {
cluster {
plugin 'ingest-geoip', project(':plugins:ingest-geoip')
}
}

View File

@ -1,220 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest;
import com.maxmind.geoip2.DatabaseReader;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.collect.HppcMaps;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.ingest.core.CompoundProcessor;
import org.elasticsearch.ingest.core.IngestDocument;
import org.elasticsearch.ingest.core.Pipeline;
import org.elasticsearch.ingest.core.Processor;
import org.elasticsearch.ingest.geoip.GeoIpProcessor;
import org.elasticsearch.ingest.geoip.IngestGeoIpPlugin;
import org.elasticsearch.ingest.grok.GrokProcessor;
import org.elasticsearch.ingest.grok.IngestGrokPlugin;
import org.elasticsearch.ingest.processor.AppendProcessor;
import org.elasticsearch.ingest.processor.ConvertProcessor;
import org.elasticsearch.ingest.processor.DateProcessor;
import org.elasticsearch.ingest.processor.ForEachProcessor;
import org.elasticsearch.ingest.processor.LowercaseProcessor;
import org.elasticsearch.ingest.processor.RemoveProcessor;
import org.elasticsearch.ingest.processor.RenameProcessor;
import org.elasticsearch.ingest.processor.SplitProcessor;
import org.elasticsearch.ingest.processor.TrimProcessor;
import org.elasticsearch.ingest.processor.UppercaseProcessor;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.StreamsUtils;
import java.io.ByteArrayInputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.nullValue;
public class CombineProcessorsTests extends ESTestCase {
private static final String LOG = "70.193.17.92 - - [08/Sep/2014:02:54:42 +0000] \"GET /presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png HTTP/1.1\" 200 175208 \"http://mobile.rivals.com/board_posts.asp?SID=880&mid=198829575&fid=2208&tid=198829575&Team=&TeamId=&SiteId=\" \"Mozilla/5.0 (Linux; Android 4.2.2; VS980 4G Build/JDQ39B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.135 Mobile Safari/537.36\"";
public void testLogging() throws Exception {
Path configDir = createTempDir();
Path geoIpConfigDir = configDir.resolve("ingest-geoip");
Files.createDirectories(geoIpConfigDir);
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")), geoIpConfigDir.resolve("GeoLite2-City.mmdb"));
Map<String, DatabaseReader> databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir);
Map<String, Object> config = new HashMap<>();
config.put("field", "log");
config.put("pattern", "%{COMBINEDAPACHELOG}");
Processor processor1 = new GrokProcessor.Factory(IngestGrokPlugin.loadBuiltinPatterns()).doCreate(null, config);
config = new HashMap<>();
config.put("field", "response");
config.put("type", "integer");
Processor processor2 = new ConvertProcessor.Factory().create(config);
config = new HashMap<>();
config.put("field", "bytes");
config.put("type", "integer");
Processor processor3 = new ConvertProcessor.Factory().create(config);
config = new HashMap<>();
config.put("match_field", "timestamp");
config.put("target_field", "timestamp");
config.put("match_formats", Arrays.asList("dd/MMM/YYYY:HH:mm:ss Z"));
Processor processor4 = new DateProcessor.Factory().create(config);
config = new HashMap<>();
config.put("source_field", "clientip");
Processor processor5 = new GeoIpProcessor.Factory(databaseReaders).create(config);
Pipeline pipeline = new Pipeline("_id", "_description", new CompoundProcessor(processor1, processor2, processor3, processor4, processor5));
Map<String, Object> source = new HashMap<>();
source.put("log", LOG);
IngestDocument document = new IngestDocument("_index", "_type", "_id", null, null, null, null, source);
pipeline.execute(document);
assertThat(document.getSourceAndMetadata().size(), equalTo(17));
assertThat(document.getSourceAndMetadata().get("request"), equalTo("/presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png"));
assertThat(document.getSourceAndMetadata().get("agent"), equalTo("\"Mozilla/5.0 (Linux; Android 4.2.2; VS980 4G Build/JDQ39B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.135 Mobile Safari/537.36\""));
assertThat(document.getSourceAndMetadata().get("auth"), equalTo("-"));
assertThat(document.getSourceAndMetadata().get("ident"), equalTo("-"));
assertThat(document.getSourceAndMetadata().get("verb"), equalTo("GET"));
assertThat(document.getSourceAndMetadata().get("referrer"), equalTo("\"http://mobile.rivals.com/board_posts.asp?SID=880&mid=198829575&fid=2208&tid=198829575&Team=&TeamId=&SiteId=\""));
assertThat(document.getSourceAndMetadata().get("response"), equalTo(200));
assertThat(document.getSourceAndMetadata().get("bytes"), equalTo(175208));
assertThat(document.getSourceAndMetadata().get("clientip"), equalTo("70.193.17.92"));
assertThat(document.getSourceAndMetadata().get("httpversion"), equalTo("1.1"));
assertThat(document.getSourceAndMetadata().get("rawrequest"), nullValue());
assertThat(document.getSourceAndMetadata().get("timestamp"), equalTo("2014-09-08T02:54:42.000Z"));
Map<String, Object> geoInfo = (Map<String, Object>) document.getSourceAndMetadata().get("geoip");
assertThat(geoInfo.size(), equalTo(5));
assertThat(geoInfo.get("continent_name"), equalTo("North America"));
assertThat(geoInfo.get("city_name"), equalTo("Charlotte"));
assertThat(geoInfo.get("country_iso_code"), equalTo("US"));
assertThat(geoInfo.get("region_name"), equalTo("North Carolina"));
assertThat(geoInfo.get("location"), notNullValue());
}
private static final String PERSON = "{\n" +
" \"age\": 33,\n" +
" \"eyeColor\": \"brown\",\n" +
" \"name\": \"Miranda Goodwin\",\n" +
" \"gender\": \"male\",\n" +
" \"company\": \"ATGEN\",\n" +
" \"email\": \"mirandagoodwin@atgen.com\",\n" +
" \"phone\": \"+1 (914) 489-3656\",\n" +
" \"address\": \"713 Bartlett Place, Accoville, Puerto Rico, 9221\",\n" +
" \"registered\": \"2014-11-23T08:34:21 -01:00\",\n" +
" \"tags\": [\n" +
" \"ex\",\n" +
" \"do\",\n" +
" \"occaecat\",\n" +
" \"reprehenderit\",\n" +
" \"anim\",\n" +
" \"laboris\",\n" +
" \"cillum\"\n" +
" ],\n" +
" \"friends\": [\n" +
" {\n" +
" \"id\": 0,\n" +
" \"name\": \"Wendi Odonnell\"\n" +
" },\n" +
" {\n" +
" \"id\": 1,\n" +
" \"name\": \"Mayra Boyd\"\n" +
" },\n" +
" {\n" +
" \"id\": 2,\n" +
" \"name\": \"Lee Gonzalez\"\n" +
" }\n" +
" ]\n" +
" }";
@SuppressWarnings("unchecked")
public void testMutate() throws Exception {
ProcessorsRegistry.Builder builder = new ProcessorsRegistry.Builder();
builder.registerProcessor("remove", (templateService, registry) -> new RemoveProcessor.Factory(templateService));
builder.registerProcessor("trim", (templateService, registry) -> new TrimProcessor.Factory());
ProcessorsRegistry registry = builder.build(TestTemplateService.instance());
Map<String, Object> config = new HashMap<>();
config.put("field", "friends");
Map<String, Object> removeConfig = new HashMap<>();
removeConfig.put("field", "_value.id");
config.put("processors", Collections.singletonList(Collections.singletonMap("remove", removeConfig)));
ForEachProcessor processor1 = new ForEachProcessor.Factory(registry).create(config);
config = new HashMap<>();
config.put("field", "tags");
config.put("value", "new_value");
AppendProcessor processor2 = new AppendProcessor.Factory(TestTemplateService.instance()).create(config);
config = new HashMap<>();
config.put("field", "address");
config.put("separator", ",");
SplitProcessor processor3 = new SplitProcessor.Factory().create(config);
config = new HashMap<>();
config.put("field", "address");
Map<String, Object> trimConfig = new HashMap<>();
trimConfig.put("field", "_value");
config.put("processors", Collections.singletonList(Collections.singletonMap("trim", trimConfig)));
ForEachProcessor processor4 = new ForEachProcessor.Factory(registry).create(config);
config = new HashMap<>();
config.put("field", "company");
LowercaseProcessor processor5 = new LowercaseProcessor.Factory().create(config);
config = new HashMap<>();
config.put("field", "gender");
UppercaseProcessor processor6 = new UppercaseProcessor.Factory().create(config);
config = new HashMap<>();
config.put("field", "eyeColor");
config.put("to", "eye_color");
RenameProcessor processor7 = new RenameProcessor.Factory().create(config);
Pipeline pipeline = new Pipeline("_id", "_description", new CompoundProcessor(
processor1, processor2, processor3, processor4, processor5, processor6, processor7
));
Map<String, Object> source = XContentHelper.createParser(new BytesArray(PERSON)).map();
IngestDocument document = new IngestDocument("_index", "_type", "_id", null, null, null, null, source);
pipeline.execute(document);
assertThat(((List<Map<String, Object>>) document.getSourceAndMetadata().get("friends")).get(0).get("id"), nullValue());
assertThat(((List<Map<String, Object>>) document.getSourceAndMetadata().get("friends")).get(1).get("id"), nullValue());
assertThat(((List<Map<String, Object>>) document.getSourceAndMetadata().get("friends")).get(2).get("id"), nullValue());
assertThat(document.getFieldValue("tags.7", String.class), equalTo("new_value"));
List<String> addressDetails = document.getFieldValue("address", List.class);
assertThat(addressDetails.size(), equalTo(4));
assertThat(addressDetails.get(0), equalTo("713 Bartlett Place"));
assertThat(addressDetails.get(1), equalTo("Accoville"));
assertThat(addressDetails.get(2), equalTo("Puerto Rico"));
assertThat(addressDetails.get(3), equalTo("9221"));
assertThat(document.getSourceAndMetadata().get("company"), equalTo("atgen"));
assertThat(document.getSourceAndMetadata().get("gender"), equalTo("MALE"));
assertThat(document.getSourceAndMetadata().get("eye_color"), equalTo("brown"));
}
}

View File

@ -45,6 +45,7 @@ public class ValueSourceMustacheIT extends AbstractMustacheTestCase {
valueSource = ValueSource.wrap(Arrays.asList("_value", "{{field1}}"), templateService); valueSource = ValueSource.wrap(Arrays.asList("_value", "{{field1}}"), templateService);
assertThat(valueSource, instanceOf(ValueSource.ListValue.class)); assertThat(valueSource, instanceOf(ValueSource.ListValue.class));
@SuppressWarnings("unchecked")
List<String> result = (List<String>) valueSource.copyAndResolve(model); List<String> result = (List<String>) valueSource.copyAndResolve(model);
assertThat(result.size(), equalTo(2)); assertThat(result.size(), equalTo(2));
assertThat(result.get(0), equalTo("_value")); assertThat(result.get(0), equalTo("_value"));
@ -56,6 +57,7 @@ public class ValueSourceMustacheIT extends AbstractMustacheTestCase {
map.put("field4", "_value"); map.put("field4", "_value");
valueSource = ValueSource.wrap(map, templateService); valueSource = ValueSource.wrap(map, templateService);
assertThat(valueSource, instanceOf(ValueSource.MapValue.class)); assertThat(valueSource, instanceOf(ValueSource.MapValue.class));
@SuppressWarnings("unchecked")
Map<String, Object> resultMap = (Map<String, Object>) valueSource.copyAndResolve(model); Map<String, Object> resultMap = (Map<String, Object>) valueSource.copyAndResolve(model);
assertThat(resultMap.size(), equalTo(3)); assertThat(resultMap.size(), equalTo(3));
assertThat(resultMap.get("field1"), equalTo("value1")); assertThat(resultMap.get("field1"), equalTo("value1"));

View File

@ -0,0 +1,199 @@
---
"Test logging":
- do:
ingest.put_pipeline:
id: "_id"
body: >
{
"processors": [
{
"grok" : {
"field" : "log",
"pattern": "%{COMBINEDAPACHELOG}"
}
},
{
"convert" : {
"field" : "response",
"type": "integer"
}
},
{
"convert" : {
"field" : "bytes",
"type": "integer"
}
},
{
"date" : {
"match_field" : "timestamp",
"target_field" : "timestamp",
"match_formats" : ["dd/MMM/YYYY:HH:mm:ss Z"]
}
},
{
"geoip" : {
"source_field" : "clientip"
}
}
]
}
- match: { acknowledged: true }
- do:
index:
index: test
type: test
id: 1
pipeline: "_id"
body: {
log: "70.193.17.92 - - [08/Sep/2014:02:54:42 +0000] \"GET /presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png HTTP/1.1\" 200 175208 \"http://mobile.rivals.com/board_posts.asp?SID=880&mid=198829575&fid=2208&tid=198829575&Team=&TeamId=&SiteId=\" \"Mozilla/5.0 (Linux; Android 4.2.2; VS980 4G Build/JDQ39B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.135 Mobile Safari/537.36\""
}
- do:
get:
index: test
type: test
id: 1
- length: { _source: 14 }
- match: { _source.request: "/presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png" }
- match: { _source.agent: "\"Mozilla/5.0 (Linux; Android 4.2.2; VS980 4G Build/JDQ39B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.135 Mobile Safari/537.36\"" }
- match: { _source.auth: "-" }
- match: { _source.verb: "GET" }
- match: { _source.referrer: "\"http://mobile.rivals.com/board_posts.asp?SID=880&mid=198829575&fid=2208&tid=198829575&Team=&TeamId=&SiteId=\"" }
- match: { _source.response: 200 }
- match: { _source.bytes: 175208 }
- match: { _source.clientip: "70.193.17.92" }
- match: { _source.httpversion: "1.1" }
- match: { _source.timestamp: "2014-09-08T02:54:42.000Z" }
- match: { _source.geoip.continent_name: "North America" }
- match: { _source.geoip.city_name: "Charlotte" }
- match: { _source.geoip.country_iso_code: "US" }
- match: { _source.geoip.region_name: "North Carolina" }
---
"Test mutate":
- do:
ingest.put_pipeline:
id: "_id"
body: >
{
"processors": [
{
"foreach" : {
"field" : "friends",
"processors" : [
{
"remove" : {
"field" : "_value.id"
}
}
]
}
},
{
"append" : {
"field" : "tags",
"value": "new_value"
}
},
{
"split" : {
"field" : "address",
"separator": ","
}
},
{
"foreach" : {
"field" : "address",
"processors" : [
{
"trim" : {
"field" : "_value"
}
}
]
}
},
{
"lowercase" : {
"field" : "company"
}
},
{
"uppercase" : {
"field" : "gender"
}
},
{
"rename" : {
"field" : "eyeColor",
"to" : "eye_color"
}
}
]
}
- match: { acknowledged: true }
- do:
index:
index: test
type: test
id: 1
pipeline: "_id"
body: {
"age" : 33,
"eyeColor" : "brown",
"name" : "Miranda Goodwin",
"gender" : "male",
"company": "ATGEN",
"email" : "mirandagoodwin@atgen.com",
"phone": "+1 (914) 489-3656",
"address" : "713 Bartlett Place, Accoville, Puerto Rico, 9221",
"registered": "2014-11-23T08:34:21 -01:00",
"tags" : [
"ex",
"do",
"occaecat",
"reprehenderit",
"anim",
"laboris",
"cillum"
],
"friends": [
{
"id" : 0,
"name" : "Wendi Odonnell"
},
{
"id" : 1,
"name" : "Mayra Boyd"
},
{
"id": 2,
"name": "Lee Gonzalez"
}
]
}
- do:
get:
index: test
type: test
id: 1
- length: { _source: 11 }
- is_false: _source.friends.0.id
- is_false: _source.friends.1.id
- is_false: _source.friends.2.id
- match: { _source.friends.0.name: "Wendi Odonnell" }
- match: { _source.friends.1.name: "Mayra Boyd" }
- match: { _source.friends.2.name: "Lee Gonzalez" }
- match: { _source.tags.7: "new_value" }
- length: { _source.address: 4 }
- match: { _source.address.0: "713 Bartlett Place" }
- match: { _source.address.1: "Accoville" }
- match: { _source.address.2: "Puerto Rico" }
- match: { _source.address.3: "9221" }
- match: { _source.company: "atgen" }
- match: { _source.gender: "MALE" }
- match: { _source.eye_color: "brown" }

View File

@ -80,6 +80,10 @@
"type" : "boolean", "type" : "boolean",
"description" : "Specify whether query terms should be lowercased" "description" : "Specify whether query terms should be lowercased"
}, },
"pipeline": {
"type" : "string",
"description" : "Ingest pipeline to set on index requests made by this action. (default: none)"
},
"preference": { "preference": {
"type" : "string", "type" : "string",
"description" : "Specify the node or shard the operation should be performed on (default: random)" "description" : "Specify the node or shard the operation should be performed on (default: random)"

View File

@ -1,6 +1,23 @@
Test Suite: Test Suite:
=========== ===========
[NOTE]
.Required settings
=======================================
Certain tests require specific settings to be applied to the
Elasticsearch instance in order to pass. You should run
Elasticsearch as follows:
[source,sh]
---------------------
bin/elasticsearch --script.inline true --node.testattr test --path.repo /tmp --repositories.url.allowed_urls 'http://snapshot.*'
---------------------
=======================================
Test file structure
--------------------
A YAML test file consists of: A YAML test file consists of:
* an optional `setup` section, followed by * an optional `setup` section, followed by
* one or more test sections * one or more test sections