Use Lucene Version that was used to create the index in Analysis

Lucene ships with a version constant that is mainly used to provide consistent behaviour across lucene release versions. Lucene's Analysis capabilities are commonly applied at index and search time such that the search-time behaviour should be identical to the index-time behaviour in most of the cases. Currently ElasticSearch always uses the latest version from Lucene which can break backwards compatibility with the index for users that rely on behaviour that changed in new Lucene version.

Users should always use the version the index was created with unless it's explicitly configured.

closes #2945
This commit is contained in:
Shay Banon 2013-04-29 13:18:51 +02:00
parent bd7ff6946e
commit 9ded2405a0
8 changed files with 81 additions and 64 deletions

View File

@ -22,6 +22,7 @@ package org.elasticsearch;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.monitor.jvm.JvmInfo;
import java.io.IOException;
@ -36,89 +37,89 @@ public class Version implements Serializable {
// the (internal) format of the id is there so we can easily do after/before checks on the id
public static final int V_0_18_0_ID = /*00*/180099;
public static final Version V_0_18_0 = new Version(V_0_18_0_ID, false);
public static final Version V_0_18_0 = new Version(V_0_18_0_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_18_1_ID = /*00*/180199;
public static final Version V_0_18_1 = new Version(V_0_18_1_ID, false);
public static final Version V_0_18_1 = new Version(V_0_18_1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_18_2_ID = /*00*/180299;
public static final Version V_0_18_2 = new Version(V_0_18_2_ID, false);
public static final Version V_0_18_2 = new Version(V_0_18_2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_18_3_ID = /*00*/180399;
public static final Version V_0_18_3 = new Version(V_0_18_3_ID, false);
public static final Version V_0_18_3 = new Version(V_0_18_3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_18_4_ID = /*00*/180499;
public static final Version V_0_18_4 = new Version(V_0_18_4_ID, false);
public static final Version V_0_18_4 = new Version(V_0_18_4_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_18_5_ID = /*00*/180599;
public static final Version V_0_18_5 = new Version(V_0_18_5_ID, false);
public static final Version V_0_18_5 = new Version(V_0_18_5_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_18_6_ID = /*00*/180699;
public static final Version V_0_18_6 = new Version(V_0_18_6_ID, false);
public static final Version V_0_18_6 = new Version(V_0_18_6_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_18_7_ID = /*00*/180799;
public static final Version V_0_18_7 = new Version(V_0_18_7_ID, false);
public static final Version V_0_18_7 = new Version(V_0_18_7_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_18_8_ID = /*00*/180899;
public static final Version V_0_18_8 = new Version(V_0_18_8_ID, false);
public static final Version V_0_18_8 = new Version(V_0_18_8_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_0_RC1_ID = /*00*/190051;
public static final Version V_0_19_0_RC1 = new Version(V_0_19_0_RC1_ID, false);
public static final Version V_0_19_0_RC1 = new Version(V_0_19_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_0_RC2_ID = /*00*/190052;
public static final Version V_0_19_0_RC2 = new Version(V_0_19_0_RC2_ID, false);
public static final Version V_0_19_0_RC2 = new Version(V_0_19_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_0_RC3_ID = /*00*/190053;
public static final Version V_0_19_0_RC3 = new Version(V_0_19_0_RC3_ID, false);
public static final Version V_0_19_0_RC3 = new Version(V_0_19_0_RC3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_0_ID = /*00*/190099;
public static final Version V_0_19_0 = new Version(V_0_19_0_ID, false);
public static final Version V_0_19_0 = new Version(V_0_19_0_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_1_ID = /*00*/190199;
public static final Version V_0_19_1 = new Version(V_0_19_1_ID, false);
public static final Version V_0_19_1 = new Version(V_0_19_1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_2_ID = /*00*/190299;
public static final Version V_0_19_2 = new Version(V_0_19_2_ID, false);
public static final Version V_0_19_2 = new Version(V_0_19_2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_3_ID = /*00*/190399;
public static final Version V_0_19_3 = new Version(V_0_19_3_ID, false);
public static final Version V_0_19_3 = new Version(V_0_19_3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_4_ID = /*00*/190499;
public static final Version V_0_19_4 = new Version(V_0_19_4_ID, false);
public static final Version V_0_19_4 = new Version(V_0_19_4_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_5_ID = /*00*/190599;
public static final Version V_0_19_5 = new Version(V_0_19_5_ID, false);
public static final Version V_0_19_5 = new Version(V_0_19_5_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_6_ID = /*00*/190699;
public static final Version V_0_19_6 = new Version(V_0_19_6_ID, false);
public static final Version V_0_19_6 = new Version(V_0_19_6_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_7_ID = /*00*/190799;
public static final Version V_0_19_7 = new Version(V_0_19_7_ID, false);
public static final Version V_0_19_7 = new Version(V_0_19_7_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_8_ID = /*00*/190899;
public static final Version V_0_19_8 = new Version(V_0_19_8_ID, false);
public static final Version V_0_19_8 = new Version(V_0_19_8_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_9_ID = /*00*/190999;
public static final Version V_0_19_9 = new Version(V_0_19_9_ID, false);
public static final Version V_0_19_9 = new Version(V_0_19_9_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_10_ID = /*00*/191099;
public static final Version V_0_19_10 = new Version(V_0_19_10_ID, false);
public static final Version V_0_19_10 = new Version(V_0_19_10_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_11_ID = /*00*/191199;
public static final Version V_0_19_11 = new Version(V_0_19_11_ID, false);
public static final Version V_0_19_11 = new Version(V_0_19_11_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_12_ID = /*00*/191299;
public static final Version V_0_19_12 = new Version(V_0_19_12_ID, false);
public static final Version V_0_19_12 = new Version(V_0_19_12_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_19_13_ID = /*00*/191399;
public static final Version V_0_19_13 = new Version(V_0_19_13_ID, false);
public static final Version V_0_19_13 = new Version(V_0_19_13_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_20_0_RC1_ID = /*00*/200051;
public static final Version V_0_20_0_RC1 = new Version(V_0_20_0_RC1_ID, false);
public static final Version V_0_20_0_RC1 = new Version(V_0_20_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_20_0_ID = /*00*/200099;
public static final Version V_0_20_0 = new Version(V_0_20_0_ID, false);
public static final Version V_0_20_0 = new Version(V_0_20_0_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_20_1_ID = /*00*/200199;
public static final Version V_0_20_1 = new Version(V_0_20_1_ID, false);
public static final Version V_0_20_1 = new Version(V_0_20_1_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_20_2_ID = /*00*/200299;
public static final Version V_0_20_2 = new Version(V_0_20_2_ID, false);
public static final Version V_0_20_2 = new Version(V_0_20_2_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_20_3_ID = /*00*/200399;
public static final Version V_0_20_3 = new Version(V_0_20_3_ID, false);
public static final Version V_0_20_3 = new Version(V_0_20_3_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_20_4_ID = /*00*/200499;
public static final Version V_0_20_4 = new Version(V_0_20_4_ID, false);
public static final Version V_0_20_4 = new Version(V_0_20_4_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_20_5_ID = /*00*/200599;
public static final Version V_0_20_5 = new Version(V_0_20_5_ID, false);
public static final Version V_0_20_5 = new Version(V_0_20_5_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_20_6_ID = /*00*/200699;
public static final Version V_0_20_6 = new Version(V_0_20_6_ID, false);
public static final Version V_0_20_6 = new Version(V_0_20_6_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_20_7_ID = /*00*/200799;
public static final Version V_0_20_7 = new Version(V_0_20_7_ID, false);
public static final Version V_0_20_7 = new Version(V_0_20_7_ID, false, org.apache.lucene.util.Version.LUCENE_36);
public static final int V_0_90_0_Beta1_ID = /*00*/900001;
public static final Version V_0_90_0_Beta1 = new Version(V_0_90_0_Beta1_ID, false);
public static final Version V_0_90_0_Beta1 = new Version(V_0_90_0_Beta1_ID, false, org.apache.lucene.util.Version.LUCENE_41);
public static final int V_0_90_0_RC1_ID = /*00*/900051;
public static final Version V_0_90_0_RC1 = new Version(V_0_90_0_RC1_ID, false);
public static final Version V_0_90_0_RC1 = new Version(V_0_90_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_41);
public static final int V_0_90_0_RC2_ID = /*00*/900052;
public static final Version V_0_90_0_RC2 = new Version(V_0_90_0_RC2_ID, false);
public static final Version V_0_90_0_RC2 = new Version(V_0_90_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_42);
public static final int V_0_90_0_RC3_ID = /*00*/900053;
public static final Version V_0_90_0_RC3 = new Version(V_0_90_0_RC3_ID, true);
public static final Version V_0_90_0_RC3 = new Version(V_0_90_0_RC3_ID, true, org.apache.lucene.util.Version.LUCENE_42);
public static final Version CURRENT = V_0_90_0_RC3;
@ -211,7 +212,7 @@ public class Version implements Serializable {
return V_0_18_8;
default:
return new Version(id, null);
return new Version(id, null, Lucene.VERSION);
}
}
@ -225,14 +226,16 @@ public class Version implements Serializable {
public final byte revision;
public final byte build;
public final Boolean snapshot;
public final org.apache.lucene.util.Version luceneVersion;
Version(int id, @Nullable Boolean snapshot) {
Version(int id, @Nullable Boolean snapshot, org.apache.lucene.util.Version luceneVersion) {
this.id = id;
this.major = (byte) ((id / 1000000) % 100);
this.minor = (byte) ((id / 10000) % 100);
this.revision = (byte) ((id / 100) % 100);
this.build = (byte) (id % 100);
this.snapshot = snapshot;
this.luceneVersion = luceneVersion;
}
public boolean snapshot() {

View File

@ -22,7 +22,6 @@ package org.elasticsearch.common.lucene;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.*;
import org.apache.lucene.index.SegmentInfos.FindSegmentsFile;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@ -35,7 +34,6 @@ import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Field;

View File

@ -21,7 +21,6 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
@ -46,7 +45,7 @@ public abstract class AbstractIndexAnalyzerProvider<T extends Analyzer> extends
public AbstractIndexAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, String name, Settings settings) {
super(index, indexSettings);
this.name = name;
this.version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
this.version = Analysis.parseAnalysisVersion(indexSettings, settings, logger);
}
/**
@ -60,7 +59,7 @@ public abstract class AbstractIndexAnalyzerProvider<T extends Analyzer> extends
public AbstractIndexAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, String prefixSettings, String name, Settings settings) {
super(index, indexSettings, prefixSettings);
this.name = name;
this.version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
this.version = Analysis.parseAnalysisVersion(indexSettings, settings, logger);
}
/**

View File

@ -20,7 +20,6 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
@ -38,7 +37,7 @@ public abstract class AbstractTokenFilterFactory extends AbstractIndexComponent
public AbstractTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, String name, Settings settings) {
super(index, indexSettings);
this.name = name;
this.version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
this.version = Analysis.parseAnalysisVersion(indexSettings, settings, logger);
}
@Override

View File

@ -20,7 +20,6 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
@ -38,7 +37,7 @@ public abstract class AbstractTokenizerFactory extends AbstractIndexComponent im
public AbstractTokenizerFactory(Index index, @IndexSettings Settings indexSettings, String name, Settings settings) {
super(index, indexSettings);
this.name = name;
this.version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
this.version = Analysis.parseAnalysisVersion(indexSettings, settings, logger);
}
@Override

View File

@ -52,10 +52,14 @@ import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.settings.IndexSettings;
import java.io.BufferedReader;
import java.io.IOException;
@ -69,6 +73,21 @@ import java.util.*;
*/
public class Analysis {
public static Version parseAnalysisVersion(@IndexSettings Settings indexSettings, Settings settings, ESLogger logger) {
// check for explicit version on the specific analyzer component
String sVersion = settings.get("version");
if (sVersion != null) {
return Lucene.parseVersion(sVersion, Lucene.ANALYZER_VERSION, logger);
}
// check for explicit version on the index itself as default for all analysis components
sVersion = indexSettings.get("index.analysis.version");
if (sVersion != null) {
return Lucene.parseVersion(sVersion, Lucene.ANALYZER_VERSION, logger);
}
// resolve the analysis version based on the version the index was created with
return indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT).luceneVersion;
}
public static boolean isNoStopwords(Settings settings) {
String value = settings.get("stopwords");
return value != null && "_none_".equals(value);

View File

@ -44,20 +44,20 @@ import java.util.Map;
* the {@code <path.conf>/hunspell} directory, where each locale has its dedicated sub-directory which holds the dictionary
* files. For example, the dictionary files for {@code en_US} locale must be placed under {@code <path.conf>/hunspell/en_US}
* directory.
*
* <p/>
* The following settings can be set for each dictionary:
* <ul>
* <li>{@code ignore_case} - If true, dictionary matching will be case insensitive (defaults to {@code false})</li>
* <li>{@code strict_affix_parsing} - Determines whether errors while reading a affix rules file will cause exception or simple be ignored (defaults to {@code true})</li>
* </ul>
*
* <p/>
* These settings can either be configured as node level configuration, such as:
* <br/><br/>
* <pre><code>
* indices.analysis.hunspell.dictionary.en_US.ignore_case: true
* indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing: false
* </code></pre>
*
* <p/>
* or, as dedicated configuration per dictionary, placed in a {@code settings.yml} file under the dictionary directory. For
* example, the following can be the content of the {@code <path.config>/hunspell/en_US/settings.yml} file:
* <br/><br/>
@ -91,7 +91,7 @@ public class HunspellService extends AbstractComponent {
this.hunspellDir = resolveHunspellDirectory(settings, env);
this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false);
this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false);
final Version version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
final Version version = Lucene.parseVersion(settings.get("indices.analysis.hunspell.version"), Lucene.ANALYZER_VERSION, logger);
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, HunspellDictionary>() {
@Override
public HunspellDictionary load(String locale) throws Exception {

View File

@ -58,7 +58,7 @@ public class HunspellServiceTests extends AbstractNodesTests {
HunspellDictionary dictionary = ((InternalNode) node).injector().getInstance(HunspellService.class).getDictionary("en_US");
assertThat(dictionary, notNullValue());
Version expectedVersion = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
Version expectedVersion = Lucene.parseVersion(settings.get("indices.analysis.hunspell.version"), Lucene.ANALYZER_VERSION, logger);
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
assertThat(dictionary.isIgnoreCase(), equalTo(true));
}
@ -77,7 +77,7 @@ public class HunspellServiceTests extends AbstractNodesTests {
HunspellDictionary dictionary = ((InternalNode) node).injector().getInstance(HunspellService.class).getDictionary("en_US");
assertThat(dictionary, notNullValue());
Version expectedVersion = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
Version expectedVersion = Lucene.parseVersion(settings.get("indices.analysis.hunspell.version"), Lucene.ANALYZER_VERSION, logger);
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
assertThat(dictionary.isIgnoreCase(), equalTo(false));