diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java index 55d6f572c5e..5b7f54289d3 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java @@ -217,22 +217,22 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable Map analyzers = newHashMap(); for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) { /* - * Lucene defaults positionOffsetGap to 0 in all analyzers but - * Elasticsearch defaults them to 0 only before version 2.1 - * and 100 afterwards so we override the positionOffsetGap if it + * Lucene defaults positionIncrementGap to 0 in all analyzers but + * Elasticsearch defaults them to 0 only before version 2.0 + * and 100 afterwards so we override the positionIncrementGap if it * doesn't match here. */ - int overridePositionOffsetGap = StringFieldMapper.Defaults.positionOffsetGap(Version.indexCreated(indexSettings)); + int overridePositionIncrementGap = StringFieldMapper.Defaults.positionIncrementGap(Version.indexCreated(indexSettings)); if (analyzerFactory instanceof CustomAnalyzerProvider) { ((CustomAnalyzerProvider) analyzerFactory).build(this); /* * Custom analyzers already default to the correct, version - * dependent positionOffsetGap and the user is be able to - * configure the positionOffsetGap directly on the analyzer so - * we disable overriding the positionOffsetGap to preserve the + * dependent positionIncrementGap and the user is be able to + * configure the positionIncrementGap directly on the analyzer so + * we disable overriding the positionIncrementGap to preserve the * user's setting. */ - overridePositionOffsetGap = Integer.MIN_VALUE; + overridePositionIncrementGap = Integer.MIN_VALUE; } Analyzer analyzerF = analyzerFactory.get(); if (analyzerF == null) { @@ -242,12 +242,12 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable if (analyzerF instanceof NamedAnalyzer) { // if we got a named analyzer back, use it... analyzer = (NamedAnalyzer) analyzerF; - if (overridePositionOffsetGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionOffsetGap) { - // unless the positionOffsetGap needs to be overridden - analyzer = new NamedAnalyzer(analyzer, overridePositionOffsetGap); + if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) { + // unless the positionIncrementGap needs to be overridden + analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap); } } else { - analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerF, overridePositionOffsetGap); + analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerF, overridePositionIncrementGap); } analyzers.put(analyzerFactory.name(), analyzer); analyzers.put(Strings.toCamelCase(analyzerFactory.name()), analyzer); diff --git a/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java b/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java index 97cd57e4ce8..b68a321359e 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java @@ -44,11 +44,11 @@ public final class CustomAnalyzer extends Analyzer { } public CustomAnalyzer(TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters, TokenFilterFactory[] tokenFilters, - int positionOffsetGap, int offsetGap) { + int positionIncrementGap, int offsetGap) { this.tokenizerFactory = tokenizerFactory; this.charFilters = charFilters; this.tokenFilters = tokenFilters; - this.positionIncrementGap = positionOffsetGap; + this.positionIncrementGap = positionIncrementGap; this.offsetGap = offsetGap; } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java b/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java index d3599dbceef..1c54e2cef8c 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java @@ -79,14 +79,28 @@ public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider 0) { + if (positionIncrementGap > 0) { // No match across gaps when slop < position gap - assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap - 1)).get(), + assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionIncrementGap - 1)).get(), 0); } // Match across gaps when slop >= position gap - assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap)).get(), 1); - assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap + 1)).get(), 1); + assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionIncrementGap)).get(), 1); + assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionIncrementGap + 1)).get(), 1); } } diff --git a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java index db46e8201d4..c632fa24668 100644 --- a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java +++ b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java @@ -253,7 +253,7 @@ public class AnalyzeActionIT extends ESIntegTestCase { ensureGreen(); client().admin().indices().preparePutMapping("test") - .setType("document").setSource("simple", "type=string,analyzer=simple,position_offset_gap=100").get(); + .setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get(); String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"}; diff --git a/docs/reference/analysis/analyzers/custom-analyzer.asciidoc b/docs/reference/analysis/analyzers/custom-analyzer.asciidoc index d11cb7f95b6..46111e32a17 100644 --- a/docs/reference/analysis/analyzers/custom-analyzer.asciidoc +++ b/docs/reference/analysis/analyzers/custom-analyzer.asciidoc @@ -20,7 +20,7 @@ filters. |`char_filter` |An optional list of logical / registered name of char filters. -|`position_offset_gap` |An optional number of positions to increment +|`position_increment_gap` |An optional number of positions to increment between each field value of a field using this analyzer. Defaults to 100. 100 was chosen because it prevents phrase queries with reasonably large slops (less than 100) from matching terms across field values. @@ -38,7 +38,7 @@ index : tokenizer : myTokenizer1 filter : [myTokenFilter1, myTokenFilter2] char_filter : [my_html] - position_offset_gap: 256 + position_increment_gap: 256 tokenizer : myTokenizer1 : type : standard diff --git a/docs/reference/mapping/params.asciidoc b/docs/reference/mapping/params.asciidoc index 119ce820ee4..719fa00acf3 100644 --- a/docs/reference/mapping/params.asciidoc +++ b/docs/reference/mapping/params.asciidoc @@ -28,7 +28,7 @@ The following mapping parameters are common to some or all field datatypes: * <> * <> * <> -* <> +* <> * <> * <> * <> @@ -78,7 +78,7 @@ include::params/norms.asciidoc[] include::params/null-value.asciidoc[] -include::params/position-offset-gap.asciidoc[] +include::params/position-increment-gap.asciidoc[] include::params/precision-step.asciidoc[] diff --git a/docs/reference/mapping/params/position-offset-gap.asciidoc b/docs/reference/mapping/params/position_increment_gap.asciidoc similarity index 86% rename from docs/reference/mapping/params/position-offset-gap.asciidoc rename to docs/reference/mapping/params/position_increment_gap.asciidoc index d22dbd8bb1e..918e3d493a5 100644 --- a/docs/reference/mapping/params/position-offset-gap.asciidoc +++ b/docs/reference/mapping/params/position_increment_gap.asciidoc @@ -1,5 +1,5 @@ -[[position-offset-gap]] -=== `position_offset_gap` +[[position-increment-gap]] +=== `position_increment_gap` <> string fields take term <> into account, in order to be able to support @@ -30,7 +30,7 @@ GET /my_index/groups/_search // AUTOSENSE <1> This phrase query matches our document, even though `Abraham` and `Lincoln` are in separate strings. -The `position_offset_gap` can introduce a fake gap between each array element. For instance: +The `position_increment_gap` can introduce a fake gap between each array element. For instance: [source,js] -------------------------------------------------- @@ -41,7 +41,7 @@ PUT my_index "properties": { "names": { "type": "string", - "position_offset_gap": 50 <1> + "position_increment_gap": 50 <1> } } } @@ -67,7 +67,7 @@ GET /my_index/groups/_search last term in the previous array element. <2> The phrase query no longer matches our document. -TIP: The `position_offset_gap` setting is allowed to have different settings +TIP: The `position_increment_gap` setting is allowed to have different settings for fields of the same name in the same index. Its value can be updated on existing fields using the <>. diff --git a/docs/reference/mapping/types/string.asciidoc b/docs/reference/mapping/types/string.asciidoc index ef497feb5a6..d5d7b7a0fce 100644 --- a/docs/reference/mapping/types/string.asciidoc +++ b/docs/reference/mapping/types/string.asciidoc @@ -141,12 +141,12 @@ Defaults depend on the <> setting: values. Defaults to `null`, which means the field is treated as missing. If the field is `analyzed`, the `null_value` will also be analyzed. -<>:: +<>:: The number of fake term positions which should be inserted between each element of an array of strings. Defaults to 0. The number of fake term position which should be inserted between each - element of an array of strings. Defaults to the position_offset_gap + element of an array of strings. Defaults to the position_increment_gap configured on the analyzer which defaults to 100. 100 was chosen because it prevents phrase queries with reasonably large slops (less than 100) from matching terms across field values. diff --git a/docs/reference/migration/migrate_2_0/mapping.asciidoc b/docs/reference/migration/migrate_2_0/mapping.asciidoc index 67cd2141092..edeb243a9ca 100644 --- a/docs/reference/migration/migrate_2_0/mapping.asciidoc +++ b/docs/reference/migration/migrate_2_0/mapping.asciidoc @@ -385,9 +385,14 @@ default. If you would like to increase compression levels, use the new <> setting instead. ==== position_offset_gap -The default `position_offset_gap` is now 100. Indexes created in Elasticsearch + +The `position_offset_gap` option is renamed to 'position_increment_gap'. This was +done to clear away the confusion. Elasticsearch's 'position_increment_gap' now is +mapped directly to Lucene's 'position_increment_gap' + +The default `position_increment_gap` is now 100. Indexes created in Elasticsearch 2.0.0 will default to using 100 and indexes created before that will continue to use the old default of 0. This was done to prevent phrase queries from matching across different values of the same term unexpectedly. Specifically, 100 was chosen to cause phrase queries with slops up to 99 to match only within -a single value of a field. +a single value of a field. \ No newline at end of file diff --git a/docs/reference/migration/migrate_2_1.asciidoc b/docs/reference/migration/migrate_2_1.asciidoc index 7542fb3d1df..906d51afdb7 100644 --- a/docs/reference/migration/migrate_2_1.asciidoc +++ b/docs/reference/migration/migrate_2_1.asciidoc @@ -24,4 +24,4 @@ GET /my_index/_search?scroll=2m Scroll requests sorted by `_doc` have been optimized to more efficiently resume from where the previous request stopped, so this will have the same performance -characteristics as the former `scan` search type. +characteristics as the former `scan` search type. \ No newline at end of file