The name "position_offset_gap" is confusing because Lucene has three
similar sounding things: * Analyzer#getPositionIncrementGap * Analyzer#getOffsetGap * IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS and * FieldType#storeTermVectorOffsets Rename position_offset_gap to position_increment_gap closes #13056
This commit is contained in:
parent
5579b8fad9
commit
fb2be6d6a1
|
@ -217,22 +217,22 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
|||
Map<String, NamedAnalyzer> analyzers = newHashMap();
|
||||
for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) {
|
||||
/*
|
||||
* Lucene defaults positionOffsetGap to 0 in all analyzers but
|
||||
* Elasticsearch defaults them to 0 only before version 2.1
|
||||
* and 100 afterwards so we override the positionOffsetGap if it
|
||||
* Lucene defaults positionIncrementGap to 0 in all analyzers but
|
||||
* Elasticsearch defaults them to 0 only before version 2.0
|
||||
* and 100 afterwards so we override the positionIncrementGap if it
|
||||
* doesn't match here.
|
||||
*/
|
||||
int overridePositionOffsetGap = StringFieldMapper.Defaults.positionOffsetGap(Version.indexCreated(indexSettings));
|
||||
int overridePositionIncrementGap = StringFieldMapper.Defaults.positionIncrementGap(Version.indexCreated(indexSettings));
|
||||
if (analyzerFactory instanceof CustomAnalyzerProvider) {
|
||||
((CustomAnalyzerProvider) analyzerFactory).build(this);
|
||||
/*
|
||||
* Custom analyzers already default to the correct, version
|
||||
* dependent positionOffsetGap and the user is be able to
|
||||
* configure the positionOffsetGap directly on the analyzer so
|
||||
* we disable overriding the positionOffsetGap to preserve the
|
||||
* dependent positionIncrementGap and the user is be able to
|
||||
* configure the positionIncrementGap directly on the analyzer so
|
||||
* we disable overriding the positionIncrementGap to preserve the
|
||||
* user's setting.
|
||||
*/
|
||||
overridePositionOffsetGap = Integer.MIN_VALUE;
|
||||
overridePositionIncrementGap = Integer.MIN_VALUE;
|
||||
}
|
||||
Analyzer analyzerF = analyzerFactory.get();
|
||||
if (analyzerF == null) {
|
||||
|
@ -242,12 +242,12 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
|
|||
if (analyzerF instanceof NamedAnalyzer) {
|
||||
// if we got a named analyzer back, use it...
|
||||
analyzer = (NamedAnalyzer) analyzerF;
|
||||
if (overridePositionOffsetGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionOffsetGap) {
|
||||
// unless the positionOffsetGap needs to be overridden
|
||||
analyzer = new NamedAnalyzer(analyzer, overridePositionOffsetGap);
|
||||
if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) {
|
||||
// unless the positionIncrementGap needs to be overridden
|
||||
analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap);
|
||||
}
|
||||
} else {
|
||||
analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerF, overridePositionOffsetGap);
|
||||
analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
|
||||
}
|
||||
analyzers.put(analyzerFactory.name(), analyzer);
|
||||
analyzers.put(Strings.toCamelCase(analyzerFactory.name()), analyzer);
|
||||
|
|
|
@ -44,11 +44,11 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
}
|
||||
|
||||
public CustomAnalyzer(TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters, TokenFilterFactory[] tokenFilters,
|
||||
int positionOffsetGap, int offsetGap) {
|
||||
int positionIncrementGap, int offsetGap) {
|
||||
this.tokenizerFactory = tokenizerFactory;
|
||||
this.charFilters = charFilters;
|
||||
this.tokenFilters = tokenFilters;
|
||||
this.positionIncrementGap = positionOffsetGap;
|
||||
this.positionIncrementGap = positionIncrementGap;
|
||||
this.offsetGap = offsetGap;
|
||||
}
|
||||
|
||||
|
|
|
@ -79,14 +79,28 @@ public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider<Custom
|
|||
tokenFilters.add(tokenFilter);
|
||||
}
|
||||
|
||||
int positionOffsetGapDefault = StringFieldMapper.Defaults.positionOffsetGap(Version.indexCreated(indexSettings));
|
||||
int positionOffsetGap = analyzerSettings.getAsInt("position_offset_gap", positionOffsetGapDefault);
|
||||
int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);
|
||||
int positionIncrementGap = StringFieldMapper.Defaults.positionIncrementGap(Version.indexCreated(indexSettings));
|
||||
|
||||
if (analyzerSettings.getAsMap().containsKey("position_offset_gap")){
|
||||
if (Version.indexCreated(indexSettings).before(Version.V_2_0_0)){
|
||||
if (analyzerSettings.getAsMap().containsKey("position_increment_gap")){
|
||||
throw new IllegalArgumentException("Custom Analyzer [" + name() +
|
||||
"] defined both [position_offset_gap] and [position_increment_gap], use only [position_increment_gap]");
|
||||
}
|
||||
positionIncrementGap = analyzerSettings.getAsInt("position_offset_gap", positionIncrementGap);
|
||||
}else {
|
||||
throw new IllegalArgumentException("Option [position_offset_gap] in Custom Analyzer [" + name() +
|
||||
"] has been renamed, please use [position_increment_gap] instead.");
|
||||
}
|
||||
}
|
||||
|
||||
positionIncrementGap = analyzerSettings.getAsInt("position_increment_gap", positionIncrementGap);
|
||||
|
||||
int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);;
|
||||
this.customAnalyzer = new CustomAnalyzer(tokenizer,
|
||||
charFilters.toArray(new CharFilterFactory[charFilters.size()]),
|
||||
tokenFilters.toArray(new TokenFilterFactory[tokenFilters.size()]),
|
||||
positionOffsetGap,
|
||||
positionIncrementGap,
|
||||
offsetGap
|
||||
);
|
||||
}
|
||||
|
|
|
@ -31,10 +31,10 @@ public class NamedAnalyzer extends DelegatingAnalyzerWrapper {
|
|||
private final String name;
|
||||
private final AnalyzerScope scope;
|
||||
private final Analyzer analyzer;
|
||||
private final int positionOffsetGap;
|
||||
private final int positionIncrementGap;
|
||||
|
||||
public NamedAnalyzer(NamedAnalyzer analyzer, int positionOffsetGap) {
|
||||
this(analyzer.name(), analyzer.scope(), analyzer.analyzer(), positionOffsetGap);
|
||||
public NamedAnalyzer(NamedAnalyzer analyzer, int positionIncrementGap) {
|
||||
this(analyzer.name(), analyzer.scope(), analyzer.analyzer(), positionIncrementGap);
|
||||
}
|
||||
|
||||
public NamedAnalyzer(String name, Analyzer analyzer) {
|
||||
|
@ -45,12 +45,12 @@ public class NamedAnalyzer extends DelegatingAnalyzerWrapper {
|
|||
this(name, scope, analyzer, Integer.MIN_VALUE);
|
||||
}
|
||||
|
||||
public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer, int positionOffsetGap) {
|
||||
public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer, int positionIncrementGap) {
|
||||
super(ERROR_STRATEGY);
|
||||
this.name = name;
|
||||
this.scope = scope;
|
||||
this.analyzer = analyzer;
|
||||
this.positionOffsetGap = positionOffsetGap;
|
||||
this.positionIncrementGap = positionIncrementGap;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -81,8 +81,8 @@ public class NamedAnalyzer extends DelegatingAnalyzerWrapper {
|
|||
|
||||
@Override
|
||||
public int getPositionIncrementGap(String fieldName) {
|
||||
if (positionOffsetGap != Integer.MIN_VALUE) {
|
||||
return positionOffsetGap;
|
||||
if (positionIncrementGap != Integer.MIN_VALUE) {
|
||||
return positionIncrementGap;
|
||||
}
|
||||
return super.getPositionIncrementGap(fieldName);
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ import static org.elasticsearch.index.mapper.core.TypeParsers.parseMultiField;
|
|||
public class StringFieldMapper extends FieldMapper implements AllFieldMapper.IncludeInAll {
|
||||
|
||||
public static final String CONTENT_TYPE = "string";
|
||||
private static final int POSITION_OFFSET_GAP_USE_ANALYZER = -1;
|
||||
private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;
|
||||
|
||||
public static class Defaults {
|
||||
public static final MappedFieldType FIELD_TYPE = new StringFieldType();
|
||||
|
@ -64,23 +64,25 @@ public class StringFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
|||
|
||||
// NOTE, when adding defaults here, make sure you add them in the builder
|
||||
public static final String NULL_VALUE = null;
|
||||
|
||||
/**
|
||||
* Post 2.0 default for position_offset_gap. Set to 100 so that
|
||||
* Post 2.0 default for position_increment_gap. Set to 100 so that
|
||||
* phrase queries of reasonably high slop will not match across field
|
||||
* values.
|
||||
*/
|
||||
public static final int POSITION_OFFSET_GAP = 100;
|
||||
public static final int POSITION_OFFSET_GAP_PRE_2_0 = 0;
|
||||
public static final int POSITION_INCREMENT_GAP = 100;
|
||||
public static final int POSITION_INCREMENT_GAP_PRE_2_0 = 0;
|
||||
|
||||
public static final int IGNORE_ABOVE = -1;
|
||||
|
||||
/**
|
||||
* The default position_offset_gap for a particular version of Elasticsearch.
|
||||
* The default position_increment_gap for a particular version of Elasticsearch.
|
||||
*/
|
||||
public static int positionOffsetGap(Version version) {
|
||||
public static int positionIncrementGap(Version version) {
|
||||
if (version.before(Version.V_2_0_0_beta1)) {
|
||||
return POSITION_OFFSET_GAP_PRE_2_0;
|
||||
return POSITION_INCREMENT_GAP_PRE_2_0;
|
||||
}
|
||||
return POSITION_OFFSET_GAP;
|
||||
return POSITION_INCREMENT_GAP;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -90,10 +92,10 @@ public class StringFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
|||
|
||||
/**
|
||||
* The distance between tokens from different values in the same field.
|
||||
* POSITION_OFFSET_GAP_USE_ANALYZER means default to the analyzer's
|
||||
* setting which in turn defaults to Defaults.POSITION_OFFSET_GAP.
|
||||
* POSITION_INCREMENT_GAP_USE_ANALYZER means default to the analyzer's
|
||||
* setting which in turn defaults to Defaults.POSITION_INCREMENT_GAP.
|
||||
*/
|
||||
protected int positionOffsetGap = POSITION_OFFSET_GAP_USE_ANALYZER;
|
||||
protected int positionIncrementGap = POSITION_INCREMENT_GAP_USE_ANALYZER;
|
||||
|
||||
protected int ignoreAbove = Defaults.IGNORE_ABOVE;
|
||||
|
||||
|
@ -108,8 +110,8 @@ public class StringFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
|||
return this;
|
||||
}
|
||||
|
||||
public Builder positionOffsetGap(int positionOffsetGap) {
|
||||
this.positionOffsetGap = positionOffsetGap;
|
||||
public Builder positionIncrementGap(int positionIncrementGap) {
|
||||
this.positionIncrementGap = positionIncrementGap;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -125,10 +127,10 @@ public class StringFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
|||
|
||||
@Override
|
||||
public StringFieldMapper build(BuilderContext context) {
|
||||
if (positionOffsetGap != POSITION_OFFSET_GAP_USE_ANALYZER) {
|
||||
fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), positionOffsetGap));
|
||||
fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), positionOffsetGap));
|
||||
fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionOffsetGap));
|
||||
if (positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) {
|
||||
fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), positionIncrementGap));
|
||||
fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), positionIncrementGap));
|
||||
fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionIncrementGap));
|
||||
}
|
||||
// if the field is not analyzed, then by default, we should omit norms and have docs only
|
||||
// index options, as probably what the user really wants
|
||||
|
@ -147,7 +149,7 @@ public class StringFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
|||
}
|
||||
setupFieldType(context);
|
||||
StringFieldMapper fieldMapper = new StringFieldMapper(
|
||||
name, fieldType, defaultFieldType, positionOffsetGap, ignoreAbove,
|
||||
name, fieldType, defaultFieldType, positionIncrementGap, ignoreAbove,
|
||||
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
|
||||
fieldMapper.includeInAll(includeInAll);
|
||||
return fieldMapper;
|
||||
|
@ -176,14 +178,15 @@ public class StringFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
|||
}
|
||||
builder.searchQuotedAnalyzer(analyzer);
|
||||
iterator.remove();
|
||||
} else if (propName.equals("position_offset_gap")) {
|
||||
int newPositionOffsetGap = XContentMapValues.nodeIntegerValue(propNode, -1);
|
||||
if (newPositionOffsetGap < 0) {
|
||||
throw new MapperParsingException("positions_offset_gap less than 0 aren't allowed.");
|
||||
} else if (propName.equals("position_increment_gap") ||
|
||||
parserContext.indexVersionCreated().before(Version.V_2_0_0) && propName.equals("position_offset_gap")) {
|
||||
int newPositionIncrementGap = XContentMapValues.nodeIntegerValue(propNode, -1);
|
||||
if (newPositionIncrementGap < 0) {
|
||||
throw new MapperParsingException("positions_increment_gap less than 0 aren't allowed.");
|
||||
}
|
||||
builder.positionOffsetGap(newPositionOffsetGap);
|
||||
builder.positionIncrementGap(newPositionIncrementGap);
|
||||
// we need to update to actual analyzers if they are not set in this case...
|
||||
// so we can inject the position offset gap...
|
||||
// so we can inject the position increment gap...
|
||||
if (builder.fieldType().indexAnalyzer() == null) {
|
||||
builder.fieldType().setIndexAnalyzer(parserContext.analysisService().defaultIndexAnalyzer());
|
||||
}
|
||||
|
@ -240,17 +243,17 @@ public class StringFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
|||
}
|
||||
|
||||
private Boolean includeInAll;
|
||||
private int positionOffsetGap;
|
||||
private int positionIncrementGap;
|
||||
private int ignoreAbove;
|
||||
|
||||
protected StringFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
||||
int positionOffsetGap, int ignoreAbove,
|
||||
int positionIncrementGap, int ignoreAbove,
|
||||
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
|
||||
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||
if (fieldType.tokenized() && fieldType.indexOptions() != NONE && fieldType().hasDocValues()) {
|
||||
throw new MapperParsingException("Field [" + fieldType.names().fullName() + "] cannot be analyzed and have doc values");
|
||||
}
|
||||
this.positionOffsetGap = positionOffsetGap;
|
||||
this.positionIncrementGap = positionIncrementGap;
|
||||
this.ignoreAbove = ignoreAbove;
|
||||
}
|
||||
|
||||
|
@ -278,8 +281,8 @@ public class StringFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
|||
return true;
|
||||
}
|
||||
|
||||
public int getPositionOffsetGap() {
|
||||
return this.positionOffsetGap;
|
||||
public int getPositionIncrementGap() {
|
||||
return this.positionIncrementGap;
|
||||
}
|
||||
|
||||
public int getIgnoreAbove() {
|
||||
|
@ -381,8 +384,8 @@ public class StringFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
|||
builder.field("include_in_all", false);
|
||||
}
|
||||
|
||||
if (includeDefaults || positionOffsetGap != POSITION_OFFSET_GAP_USE_ANALYZER) {
|
||||
builder.field("position_offset_gap", positionOffsetGap);
|
||||
if (includeDefaults || positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) {
|
||||
builder.field("position_increment_gap", positionIncrementGap);
|
||||
}
|
||||
NamedAnalyzer searchQuoteAnalyzer = fieldType().searchQuoteAnalyzer();
|
||||
if (searchQuoteAnalyzer != null && !searchQuoteAnalyzer.name().equals(fieldType().searchAnalyzer().name())) {
|
||||
|
|
|
@ -41,7 +41,7 @@ import org.elasticsearch.common.util.MultiDataPathUpgrader;
|
|||
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||
import org.elasticsearch.env.NodeEnvironment;
|
||||
import org.elasticsearch.index.engine.EngineConfig;
|
||||
import org.elasticsearch.index.mapper.string.StringFieldMapperPositionOffsetGapTests;
|
||||
import org.elasticsearch.index.mapper.string.StringFieldMapperPositionIncrementGapTests;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.index.shard.MergePolicyConfig;
|
||||
import org.elasticsearch.indices.recovery.RecoverySettings;
|
||||
|
@ -332,7 +332,7 @@ public class OldIndexBackwardsCompatibilityIT extends ESIntegTestCase {
|
|||
assertNewReplicasWork(indexName);
|
||||
assertUpgradeWorks(indexName, isLatestLuceneVersion(version));
|
||||
assertDeleteByQueryWorked(indexName, version);
|
||||
assertPositionOffsetGapDefaults(indexName, version);
|
||||
assertPositionIncrementGapDefaults(indexName, version);
|
||||
unloadIndex(indexName);
|
||||
}
|
||||
|
||||
|
@ -445,11 +445,11 @@ public class OldIndexBackwardsCompatibilityIT extends ESIntegTestCase {
|
|||
assertEquals(0, searchReq.get().getHits().getTotalHits());
|
||||
}
|
||||
|
||||
void assertPositionOffsetGapDefaults(String indexName, Version version) throws Exception {
|
||||
void assertPositionIncrementGapDefaults(String indexName, Version version) throws Exception {
|
||||
if (version.before(Version.V_2_0_0_beta1)) {
|
||||
StringFieldMapperPositionOffsetGapTests.assertGapIsZero(client(), indexName, "doc");
|
||||
StringFieldMapperPositionIncrementGapTests.assertGapIsZero(client(), indexName, "doc");
|
||||
} else {
|
||||
StringFieldMapperPositionOffsetGapTests.assertGapIsOneHundred(client(), indexName, "doc");
|
||||
StringFieldMapperPositionIncrementGapTests.assertGapIsOneHundred(client(), indexName, "doc");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,8 +42,8 @@ import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory;
|
|||
import org.elasticsearch.index.settings.IndexSettingsModule;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.test.VersionUtils;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
|
@ -87,26 +87,22 @@ public class AnalysisModuleTests extends ESTestCase {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleConfigurationJson() {
|
||||
Settings settings = loadFromClasspath("/org/elasticsearch/index/analysis/test1.json");
|
||||
testSimpleConfiguration(settings);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleConfigurationYaml() {
|
||||
Settings settings = loadFromClasspath("/org/elasticsearch/index/analysis/test1.yml");
|
||||
testSimpleConfiguration(settings);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultFactoryTokenFilters() throws IOException {
|
||||
assertTokenFilter("keyword_repeat", KeywordRepeatFilter.class);
|
||||
assertTokenFilter("persian_normalization", PersianNormalizationFilter.class);
|
||||
assertTokenFilter("arabic_normalization", ArabicNormalizationFilter.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVersionedAnalyzers() throws Exception {
|
||||
String yaml = "/org/elasticsearch/index/analysis/test1.yml";
|
||||
Settings settings2 = settingsBuilder()
|
||||
|
@ -164,7 +160,7 @@ public class AnalysisModuleTests extends ESTestCase {
|
|||
// html = (HtmlStripCharFilterFactory) custom2.charFilters()[1];
|
||||
// assertThat(html.readAheadLimit(), equalTo(1024));
|
||||
|
||||
// verify position offset gap
|
||||
// verify position increment gap
|
||||
analyzer = analysisService.analyzer("custom6").analyzer();
|
||||
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
|
||||
CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
|
||||
|
@ -215,7 +211,6 @@ public class AnalysisModuleTests extends ESTestCase {
|
|||
// MatcherAssert.assertThat(wordList, hasItems("donau", "dampf", "schiff", "spargel", "creme", "suppe"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWordListPath() throws Exception {
|
||||
Settings settings = Settings.builder()
|
||||
.put("path.home", createTempDir().toString())
|
||||
|
@ -243,7 +238,6 @@ public class AnalysisModuleTests extends ESTestCase {
|
|||
return wordListFile;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnderscoreInAnalyzerName() {
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.analyzer._invalid_name.tokenizer", "keyword")
|
||||
|
@ -259,7 +253,6 @@ public class AnalysisModuleTests extends ESTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnderscoreInAnalyzerNameAlias() {
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.analyzer.valid_name.tokenizer", "keyword")
|
||||
|
@ -275,4 +268,61 @@ public class AnalysisModuleTests extends ESTestCase {
|
|||
assertThat(e.getCause().getMessage(), equalTo("analyzer name must not start with '_'. got \"_invalid_name\""));
|
||||
}
|
||||
}
|
||||
|
||||
public void testBackwardCompatible() {
|
||||
Settings settings = settingsBuilder()
|
||||
.put("index.analysis.analyzer.custom1.tokenizer", "standard")
|
||||
.put("index.analysis.analyzer.custom1.position_offset_gap", "128")
|
||||
.put("index.analysis.analyzer.custom2.tokenizer", "standard")
|
||||
.put("index.analysis.analyzer.custom2.position_increment_gap", "256")
|
||||
.put("path.home", createTempDir().toString())
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_1_0_0,
|
||||
Version.V_1_7_1))
|
||||
.build();
|
||||
AnalysisService analysisService = getAnalysisService(settings);
|
||||
|
||||
Analyzer custom1 = analysisService.analyzer("custom1").analyzer();
|
||||
assertThat(custom1, instanceOf(CustomAnalyzer.class));
|
||||
assertThat(custom1.getPositionIncrementGap("custom1"), equalTo(128));
|
||||
|
||||
Analyzer custom2 = analysisService.analyzer("custom2").analyzer();
|
||||
assertThat(custom2, instanceOf(CustomAnalyzer.class));
|
||||
assertThat(custom2.getPositionIncrementGap("custom2"), equalTo(256));
|
||||
}
|
||||
|
||||
public void testWithBothSettings() {
|
||||
Settings settings = settingsBuilder()
|
||||
.put("index.analysis.analyzer.custom.tokenizer", "standard")
|
||||
.put("index.analysis.analyzer.custom.position_offset_gap", "128")
|
||||
.put("index.analysis.analyzer.custom.position_increment_gap", "256")
|
||||
.put("path.home", createTempDir().toString())
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_1_0_0,
|
||||
Version.V_1_7_1))
|
||||
.build();
|
||||
try {
|
||||
getAnalysisService(settings);
|
||||
fail("Analyzer has both position_offset_gap and position_increment_gap should fail");
|
||||
} catch (ProvisionException e) {
|
||||
assertTrue(e.getCause() instanceof IllegalArgumentException);
|
||||
assertThat(e.getCause().getMessage(), equalTo("Custom Analyzer [custom] defined both [position_offset_gap] and [position_increment_gap]" +
|
||||
", use only [position_increment_gap]"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testDeprecatedPositionOffsetGap() {
|
||||
Settings settings = settingsBuilder()
|
||||
.put("index.analysis.analyzer.custom.tokenizer", "standard")
|
||||
.put("index.analysis.analyzer.custom.position_offset_gap", "128")
|
||||
.put("path.home", createTempDir().toString())
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.build();
|
||||
try {
|
||||
getAnalysisService(settings);
|
||||
fail("Analyzer should fail if it has position_offset_gap");
|
||||
} catch (ProvisionException e) {
|
||||
assertTrue(e.getCause() instanceof IllegalArgumentException);
|
||||
assertThat(e.getCause().getMessage(), equalTo("Option [position_offset_gap] in Custom Analyzer [custom] " +
|
||||
"has been renamed, please use [position_increment_gap] instead."));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,7 +68,7 @@
|
|||
},
|
||||
"custom6":{
|
||||
"tokenizer":"standard",
|
||||
"position_offset_gap": 256
|
||||
"position_increment_gap": 256
|
||||
},
|
||||
"czechAnalyzerWithStemmer":{
|
||||
"tokenizer":"standard",
|
||||
|
|
|
@ -50,7 +50,7 @@ index :
|
|||
char_filter : [my_mapping]
|
||||
custom6 :
|
||||
tokenizer : standard
|
||||
position_offset_gap: 256
|
||||
position_increment_gap: 256
|
||||
custom7 :
|
||||
type : standard
|
||||
version: 3.6
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
|||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.mapper.ContentPath;
|
||||
|
@ -43,7 +44,12 @@ import org.elasticsearch.index.mapper.MergeResult;
|
|||
import org.elasticsearch.index.mapper.ParseContext.Document;
|
||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||
import org.elasticsearch.index.mapper.core.StringFieldMapper;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
import org.elasticsearch.test.VersionUtils;
|
||||
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -54,6 +60,7 @@ import static org.elasticsearch.index.mapper.core.StringFieldMapper.Builder;
|
|||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.notNullValue;
|
||||
import static org.hamcrest.Matchers.nullValue;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -222,22 +229,22 @@ public class SimpleStringMappingTests extends ESSingleNodeTestCase {
|
|||
.startObject("properties")
|
||||
.startObject("field1")
|
||||
.field("type", "string")
|
||||
.field("position_offset_gap", 1000)
|
||||
.field("position_increment_gap", 1000)
|
||||
.endObject()
|
||||
.startObject("field2")
|
||||
.field("type", "string")
|
||||
.field("position_offset_gap", 1000)
|
||||
.field("position_increment_gap", 1000)
|
||||
.field("analyzer", "standard")
|
||||
.endObject()
|
||||
.startObject("field3")
|
||||
.field("type", "string")
|
||||
.field("position_offset_gap", 1000)
|
||||
.field("position_increment_gap", 1000)
|
||||
.field("analyzer", "standard")
|
||||
.field("search_analyzer", "simple")
|
||||
.endObject()
|
||||
.startObject("field4")
|
||||
.field("type", "string")
|
||||
.field("position_offset_gap", 1000)
|
||||
.field("position_increment_gap", 1000)
|
||||
.field("analyzer", "standard")
|
||||
.field("search_analyzer", "simple")
|
||||
.field("search_quote_analyzer", "simple")
|
||||
|
@ -256,12 +263,12 @@ public class SimpleStringMappingTests extends ESSingleNodeTestCase {
|
|||
.startObject("properties")
|
||||
.startObject("field1")
|
||||
.field("type", "string")
|
||||
.field("position_offset_gap", 1000)
|
||||
.field("position_increment_gap", 1000)
|
||||
.field("search_quote_analyzer", "simple")
|
||||
.endObject()
|
||||
.startObject("field2")
|
||||
.field("type", "string")
|
||||
.field("position_offset_gap", 1000)
|
||||
.field("position_increment_gap", 1000)
|
||||
.field("analyzer", "standard")
|
||||
.field("search_analyzer", "standard")
|
||||
.field("search_quote_analyzer", "simple")
|
||||
|
@ -518,4 +525,48 @@ public class SimpleStringMappingTests extends ESSingleNodeTestCase {
|
|||
assertTrue(mergeResult.buildConflicts()[0].contains("cannot enable norms"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that expected exceptions are thrown when creating a new index with position_offset_gap
|
||||
*/
|
||||
public void testPositionOffsetGapDeprecation() throws Exception {
|
||||
// test deprecation exceptions on newly created indexes
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field1")
|
||||
.field("type", "string")
|
||||
.field("position_increment_gap", 10)
|
||||
.endObject()
|
||||
.startObject("field2")
|
||||
.field("type", "string")
|
||||
.field("position_offset_gap", 50)
|
||||
.field("analyzer", "standard")
|
||||
.endObject().endObject().endObject().endObject().string();
|
||||
try {
|
||||
parser.parse(mapping);
|
||||
fail("Mapping definition should fail with the position_offset_gap setting");
|
||||
}catch (MapperParsingException e) {
|
||||
assertEquals(e.getMessage(), "Mapping definition for [field2] has unsupported parameters: [position_offset_gap : 50]");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test backward compatibility
|
||||
*/
|
||||
public void testBackwardCompatible() throws Exception {
|
||||
|
||||
Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_1_0_0,
|
||||
Version.V_1_7_1)).build();
|
||||
|
||||
DocumentMapperParser parser = createIndex("backward_compatible_index", settings).mapperService().documentMapperParser();
|
||||
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field1")
|
||||
.field("type", "string")
|
||||
.field("position_offset_gap", 10)
|
||||
.endObject().endObject().endObject().endObject().string();
|
||||
parser.parse(mapping);
|
||||
|
||||
assertThat(parser.parse(mapping).mapping().toString(), containsString("\"position_increment_gap\":10"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,12 +35,12 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitC
|
|||
import static org.hamcrest.Matchers.containsString;
|
||||
|
||||
/**
|
||||
* Tests that position_offset_gap is read from the mapper and applies as
|
||||
* Tests that position_increment_gap is read from the mapper and applies as
|
||||
* expected in queries.
|
||||
*/
|
||||
public class StringFieldMapperPositionOffsetGapTests extends ESSingleNodeTestCase {
|
||||
public class StringFieldMapperPositionIncrementGapTests extends ESSingleNodeTestCase {
|
||||
/**
|
||||
* The default position_offset_gap should be large enough that most
|
||||
* The default position_increment_gap should be large enough that most
|
||||
* "sensible" queries phrase slops won't match across values.
|
||||
*/
|
||||
public void testDefault() throws IOException {
|
||||
|
@ -53,7 +53,7 @@ public class StringFieldMapperPositionOffsetGapTests extends ESSingleNodeTestCas
|
|||
public static void assertGapIsOneHundred(Client client, String indexName, String type) throws IOException {
|
||||
testGap(client(), indexName, type, 100);
|
||||
|
||||
// No match across gap using default slop with default positionOffsetGap
|
||||
// No match across gap using default slop with default positionIncrementGap
|
||||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two")).get(), 0);
|
||||
|
||||
// Nor with small-ish values
|
||||
|
@ -77,7 +77,7 @@ public class StringFieldMapperPositionOffsetGapTests extends ESSingleNodeTestCas
|
|||
testGap(client, indexName, type, 0);
|
||||
/*
|
||||
* Phrases match across different values using default slop with pre-2.0 default
|
||||
* position_offset_gap.
|
||||
* position_increment_gap.
|
||||
*/
|
||||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two")).get(), 1);
|
||||
}
|
||||
|
@ -97,12 +97,12 @@ public class StringFieldMapperPositionOffsetGapTests extends ESSingleNodeTestCas
|
|||
setupGapInMapping(-1);
|
||||
fail("Expected an error");
|
||||
} catch (MapperParsingException e) {
|
||||
assertThat(ExceptionsHelper.detailedMessage(e), containsString("positions_offset_gap less than 0 aren't allowed"));
|
||||
assertThat(ExceptionsHelper.detailedMessage(e), containsString("positions_increment_gap less than 0 aren't allowed"));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that the default actually defaults to the position_offset_gap
|
||||
* Tests that the default actually defaults to the position_increment_gap
|
||||
* configured in the analyzer. This behavior is very old and a little
|
||||
* strange but not worth breaking some thought.
|
||||
*/
|
||||
|
@ -111,26 +111,26 @@ public class StringFieldMapperPositionOffsetGapTests extends ESSingleNodeTestCas
|
|||
.startObject("gappy");
|
||||
settings.field("type", "custom");
|
||||
settings.field("tokenizer", "standard");
|
||||
settings.field("position_offset_gap", 2);
|
||||
settings.field("position_increment_gap", 2);
|
||||
setupAnalyzer(settings, "gappy");
|
||||
testGap(client(), "test", "test", 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an index named "test" with a field named "string" with the provided
|
||||
* positionOffsetGap that uses the standard analyzer.
|
||||
* positionIncrementGap that uses the standard analyzer.
|
||||
*/
|
||||
private void setupGapInMapping(int positionOffsetGap) throws IOException {
|
||||
private void setupGapInMapping(int positionIncrementGap) throws IOException {
|
||||
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("properties").startObject("string");
|
||||
mapping.field("type", "string");
|
||||
mapping.field("position_offset_gap", positionOffsetGap);
|
||||
mapping.field("position_increment_gap", positionIncrementGap);
|
||||
client().admin().indices().prepareCreate("test").addMapping("test", mapping).get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an index named "test" with the provided settings and and a field
|
||||
* named "string" that uses the specified analyzer and default
|
||||
* position_offset_gap.
|
||||
* position_increment_gap.
|
||||
*/
|
||||
private void setupAnalyzer(XContentBuilder settings, String analyzer) throws IOException {
|
||||
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("properties").startObject("string");
|
||||
|
@ -139,20 +139,20 @@ public class StringFieldMapperPositionOffsetGapTests extends ESSingleNodeTestCas
|
|||
client().admin().indices().prepareCreate("test").addMapping("test", mapping).setSettings(settings).get();
|
||||
}
|
||||
|
||||
private static void testGap(Client client, String indexName, String type, int positionOffsetGap) throws IOException {
|
||||
private static void testGap(Client client, String indexName, String type, int positionIncrementGap) throws IOException {
|
||||
client.prepareIndex(indexName, type, "position_gap_test").setSource("string", ImmutableList.of("one", "two three")).setRefresh(true).get();
|
||||
|
||||
// Baseline - phrase query finds matches in the same field value
|
||||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "two three")).get(), 1);
|
||||
|
||||
if (positionOffsetGap > 0) {
|
||||
if (positionIncrementGap > 0) {
|
||||
// No match across gaps when slop < position gap
|
||||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap - 1)).get(),
|
||||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionIncrementGap - 1)).get(),
|
||||
0);
|
||||
}
|
||||
|
||||
// Match across gaps when slop >= position gap
|
||||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap)).get(), 1);
|
||||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap + 1)).get(), 1);
|
||||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionIncrementGap)).get(), 1);
|
||||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionIncrementGap + 1)).get(), 1);
|
||||
}
|
||||
}
|
|
@ -253,7 +253,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|||
ensureGreen();
|
||||
|
||||
client().admin().indices().preparePutMapping("test")
|
||||
.setType("document").setSource("simple", "type=string,analyzer=simple,position_offset_gap=100").get();
|
||||
.setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get();
|
||||
|
||||
String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"};
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ filters.
|
|||
|`char_filter` |An optional list of logical / registered name of char
|
||||
filters.
|
||||
|
||||
|`position_offset_gap` |An optional number of positions to increment
|
||||
|`position_increment_gap` |An optional number of positions to increment
|
||||
between each field value of a field using this analyzer. Defaults to 100.
|
||||
100 was chosen because it prevents phrase queries with reasonably large
|
||||
slops (less than 100) from matching terms across field values.
|
||||
|
@ -38,7 +38,7 @@ index :
|
|||
tokenizer : myTokenizer1
|
||||
filter : [myTokenFilter1, myTokenFilter2]
|
||||
char_filter : [my_html]
|
||||
position_offset_gap: 256
|
||||
position_increment_gap: 256
|
||||
tokenizer :
|
||||
myTokenizer1 :
|
||||
type : standard
|
||||
|
|
|
@ -28,7 +28,7 @@ The following mapping parameters are common to some or all field datatypes:
|
|||
* <<multi-fields,`fields`>>
|
||||
* <<norms,`norms`>>
|
||||
* <<null-value,`null_value`>>
|
||||
* <<position-offset-gap,`position_offset_gap`>>
|
||||
* <<position-increment-gap,`position_increment_gap`>>
|
||||
* <<properties,`properties`>>
|
||||
* <<search-analyzer,`search_analyzer`>>
|
||||
* <<similarity,`similarity`>>
|
||||
|
@ -78,7 +78,7 @@ include::params/norms.asciidoc[]
|
|||
|
||||
include::params/null-value.asciidoc[]
|
||||
|
||||
include::params/position-offset-gap.asciidoc[]
|
||||
include::params/position-increment-gap.asciidoc[]
|
||||
|
||||
include::params/precision-step.asciidoc[]
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[position-offset-gap]]
|
||||
=== `position_offset_gap`
|
||||
[[position-increment-gap]]
|
||||
=== `position_increment_gap`
|
||||
|
||||
<<mapping-index,Analyzed>> string fields take term <<index-options,positions>>
|
||||
into account, in order to be able to support
|
||||
|
@ -30,7 +30,7 @@ GET /my_index/groups/_search
|
|||
// AUTOSENSE
|
||||
<1> This phrase query matches our document, even though `Abraham` and `Lincoln` are in separate strings.
|
||||
|
||||
The `position_offset_gap` can introduce a fake gap between each array element. For instance:
|
||||
The `position_increment_gap` can introduce a fake gap between each array element. For instance:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
|
@ -41,7 +41,7 @@ PUT my_index
|
|||
"properties": {
|
||||
"names": {
|
||||
"type": "string",
|
||||
"position_offset_gap": 50 <1>
|
||||
"position_increment_gap": 50 <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ GET /my_index/groups/_search
|
|||
last term in the previous array element.
|
||||
<2> The phrase query no longer matches our document.
|
||||
|
||||
TIP: The `position_offset_gap` setting is allowed to have different settings
|
||||
TIP: The `position_increment_gap` setting is allowed to have different settings
|
||||
for fields of the same name in the same index. Its value can be updated on
|
||||
existing fields using the <<indices-put-mapping,PUT mapping API>>.
|
||||
|
|
@ -141,12 +141,12 @@ Defaults depend on the <<mapping-index,`index`>> setting:
|
|||
values. Defaults to `null`, which means the field is treated as missing.
|
||||
If the field is `analyzed`, the `null_value` will also be analyzed.
|
||||
|
||||
<<position-offset-gap,`position_offset_gap`>>::
|
||||
<<position-increment-gap,`position_increment_gap`>>::
|
||||
|
||||
The number of fake term positions which should be inserted between
|
||||
each element of an array of strings. Defaults to 0.
|
||||
The number of fake term position which should be inserted between each
|
||||
element of an array of strings. Defaults to the position_offset_gap
|
||||
element of an array of strings. Defaults to the position_increment_gap
|
||||
configured on the analyzer which defaults to 100. 100 was chosen because it
|
||||
prevents phrase queries with reasonably large slops (less than 100) from
|
||||
matching terms across field values.
|
||||
|
|
|
@ -385,9 +385,14 @@ default. If you would like to increase compression levels, use the new
|
|||
<<index-codec,`index.codec: best_compression`>> setting instead.
|
||||
|
||||
==== position_offset_gap
|
||||
The default `position_offset_gap` is now 100. Indexes created in Elasticsearch
|
||||
|
||||
The `position_offset_gap` option is renamed to 'position_increment_gap'. This was
|
||||
done to clear away the confusion. Elasticsearch's 'position_increment_gap' now is
|
||||
mapped directly to Lucene's 'position_increment_gap'
|
||||
|
||||
The default `position_increment_gap` is now 100. Indexes created in Elasticsearch
|
||||
2.0.0 will default to using 100 and indexes created before that will continue
|
||||
to use the old default of 0. This was done to prevent phrase queries from
|
||||
matching across different values of the same term unexpectedly. Specifically,
|
||||
100 was chosen to cause phrase queries with slops up to 99 to match only within
|
||||
a single value of a field.
|
||||
a single value of a field.
|
|
@ -24,4 +24,4 @@ GET /my_index/_search?scroll=2m
|
|||
|
||||
Scroll requests sorted by `_doc` have been optimized to more efficiently resume
|
||||
from where the previous request stopped, so this will have the same performance
|
||||
characteristics as the former `scan` search type.
|
||||
characteristics as the former `scan` search type.
|
Loading…
Reference in New Issue