Upgrade to lucene 5.5.0-snapshot-1725675

This commit is contained in:
Robert Muir 2016-02-02 22:53:39 -05:00
parent 99cf7de514
commit d5dc05f69e
57 changed files with 80 additions and 44 deletions

View File

@ -289,7 +289,7 @@ class BuildPlugin implements Plugin<Project> {
String luceneVersion = VersionProperties.lucene
if (luceneVersion.contains('-snapshot')) {
// extract the revision number from the version with a regex matcher
String revision = (luceneVersion =~ /\w+-snapshot-(\d+)/)[0][1]
String revision = (luceneVersion =~ /\w+-snapshot-([a-z0-9]+)/)[0][1]
repos.maven {
name 'lucene-snapshots'
url "http://s3.amazonaws.com/download.elasticsearch.org/lucenesnapshots/${revision}"

View File

@ -1,5 +1,5 @@
elasticsearch = 3.0.0-SNAPSHOT
lucene = 5.5.0-snapshot-1725675
lucene = 5.5.0-snapshot-4de5f1d
# optional dependencies
spatial4j = 0.5

View File

@ -63,7 +63,7 @@ public class AllField extends Field {
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
try {
allEntries.reset(); // reset the all entries, just in case it was read already
if (allEntries.customBoost() && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {

View File

@ -320,7 +320,7 @@ public class DoubleFieldMapper extends NumberFieldMapper {
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
if (fieldType().indexOptions() != IndexOptions.NONE) {
return getCachedStream().setDoubleValue(number);
}

View File

@ -332,7 +332,7 @@ public class FloatFieldMapper extends NumberFieldMapper {
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
if (fieldType().indexOptions() != IndexOptions.NONE) {
return getCachedStream().setFloatValue(number);
}

View File

@ -340,7 +340,7 @@ public class IntegerFieldMapper extends NumberFieldMapper {
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
if (fieldType().indexOptions() != IndexOptions.NONE) {
return getCachedStream().setIntValue(number);
}

View File

@ -323,7 +323,7 @@ public class LongFieldMapper extends NumberFieldMapper {
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
if (fieldType().indexOptions() != IndexOptions.NONE) {
return getCachedStream().setLongValue(number);
}

View File

@ -408,7 +408,7 @@ public abstract class NumberFieldMapper extends FieldMapper implements AllFieldM
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
return null;
}

View File

@ -332,7 +332,7 @@ public class ShortFieldMapper extends NumberFieldMapper {
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
if (fieldType().indexOptions() != IndexOptions.NONE) {
return getCachedStream().setIntValue(number);
}

View File

@ -20,30 +20,58 @@
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.DFISimilarity;
import org.apache.lucene.search.similarities.Independence;
import org.apache.lucene.search.similarities.IndependenceChiSquared;
import org.apache.lucene.search.similarities.IndependenceSaturated;
import org.apache.lucene.search.similarities.IndependenceStandardized;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.settings.Settings;
import java.util.HashMap;
import java.util.Map;
import static java.util.Collections.unmodifiableMap;
/**
* {@link SimilarityProvider} for the {@link DFISimilarity}.
* <p>
* Configuration options available:
* <ul>
* <li>independence_measure</li>
* <li>discount_overlaps</li>
* </ul>
* @see DFISimilarity For more information about configuration
*/
public class DFISimilarityProvider extends AbstractSimilarityProvider {
// the "basic models" of divergence from independence
private static final Map<String, Independence> INDEPENDENCE_MEASURES;
static {
Map<String, Independence> measures = new HashMap<>();
measures.put("standardized", new IndependenceStandardized());
measures.put("saturated", new IndependenceSaturated());
measures.put("chisquared", new IndependenceChiSquared());
INDEPENDENCE_MEASURES = unmodifiableMap(measures);
}
private final DFISimilarity similarity;
public DFISimilarityProvider(String name, Settings settings) {
super(name);
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
this.similarity = new DFISimilarity();
Independence measure = parseIndependence(settings);
this.similarity = new DFISimilarity(measure);
this.similarity.setDiscountOverlaps(discountOverlaps);
}
private Independence parseIndependence(Settings settings) {
String name = settings.get("independence_measure");
Independence measure = INDEPENDENCE_MEASURES.get(name);
if (measure == null) {
throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "]");
}
return measure;
}
@Override
public Similarity get() {
return similarity;

View File

@ -52,8 +52,8 @@ import static java.util.Collections.unmodifiableMap;
* @see DFRSimilarity For more information about configuration
*/
public class DFRSimilarityProvider extends AbstractSimilarityProvider {
private static final Map<String, BasicModel> MODEL_CACHE;
private static final Map<String, AfterEffect> EFFECT_CACHE;
private static final Map<String, BasicModel> BASIC_MODELS;
private static final Map<String, AfterEffect> AFTER_EFFECTS;
static {
Map<String, BasicModel> models = new HashMap<>();
@ -64,13 +64,13 @@ public class DFRSimilarityProvider extends AbstractSimilarityProvider {
models.put("in", new BasicModelIn());
models.put("ine", new BasicModelIne());
models.put("p", new BasicModelP());
MODEL_CACHE = unmodifiableMap(models);
BASIC_MODELS = unmodifiableMap(models);
Map<String, AfterEffect> effects = new HashMap<>();
effects.put("no", new AfterEffect.NoAfterEffect());
effects.put("b", new AfterEffectB());
effects.put("l", new AfterEffectL());
EFFECT_CACHE = unmodifiableMap(effects);
AFTER_EFFECTS = unmodifiableMap(effects);
}
private final DFRSimilarity similarity;
@ -91,7 +91,7 @@ public class DFRSimilarityProvider extends AbstractSimilarityProvider {
*/
protected BasicModel parseBasicModel(Settings settings) {
String basicModel = settings.get("basic_model");
BasicModel model = MODEL_CACHE.get(basicModel);
BasicModel model = BASIC_MODELS.get(basicModel);
if (model == null) {
throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "]");
}
@ -106,7 +106,7 @@ public class DFRSimilarityProvider extends AbstractSimilarityProvider {
*/
protected AfterEffect parseAfterEffect(Settings settings) {
String afterEffect = settings.get("after_effect");
AfterEffect effect = EFFECT_CACHE.get(afterEffect);
AfterEffect effect = AFTER_EFFECTS.get(afterEffect);
if (effect == null) {
throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "]");
}

View File

@ -31,8 +31,8 @@ grant codeBase "${codebase.securesm-1.0.jar}" {
//// Very special jar permissions:
//// These are dangerous permissions that we don't want to grant to everything.
grant codeBase "${codebase.lucene-core-5.5.0-snapshot-1725675.jar}" {
// needed to allow MMapDirectory's "unmap hack"
grant codeBase "${codebase.lucene-core-5.5.0-snapshot-4de5f1d.jar}" {
// needed to allow MMapDirectory's "unmap hack" (die unmap hack, die)
permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
// NOTE: also needed for RAMUsageEstimator size calculations

View File

@ -31,7 +31,7 @@ grant codeBase "${codebase.securemock-1.2.jar}" {
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
};
grant codeBase "${codebase.lucene-test-framework-5.5.0-snapshot-1725675.jar}" {
grant codeBase "${codebase.lucene-test-framework-5.5.0-snapshot-4de5f1d.jar}" {
// needed by RamUsageTester
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
};

View File

@ -209,7 +209,7 @@ public class VersionsTests extends ESTestCase {
this.version = version;
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
return new TokenStream() {
boolean finished = true;
final CharTermAttribute term = addAttribute(CharTermAttribute.class);

View File

@ -110,6 +110,7 @@ public class GeoPolygonQueryBuilderTests extends AbstractQueryTestCase<GeoPolygo
* explicitly mapped
*/
@Override
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/16399")
public void testToQuery() throws IOException {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
super.testToQuery();

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.DFRSimilarity;
import org.apache.lucene.search.similarities.DistributionSPL;
import org.apache.lucene.search.similarities.IBSimilarity;
import org.apache.lucene.search.similarities.IndependenceChiSquared;
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.LambdaTTF;
@ -167,12 +168,14 @@ public class SimilarityTests extends ESSingleNodeTestCase {
Settings indexSettings = Settings.settingsBuilder()
.put("index.similarity.my_similarity.type", "DFI")
.put("index.similarity.my_similarity.independence_measure", "chisquared")
.build();
IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
MappedFieldType fieldType = documentMapper.mappers().getMapper("field1").fieldType();
assertThat(fieldType.similarity(), instanceOf(DFISimilarityProvider.class));
assertThat(fieldType.similarity().get(), instanceOf(DFISimilarity.class));
DFISimilarity similarity = (DFISimilarity) fieldType.similarity().get();
assertThat(similarity.getIndependence(), instanceOf(IndependenceChiSquared.class));
}
public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException {

View File

@ -1 +0,0 @@
528a695bb8882dbc3d9866335ac1bb3905cba4e3

View File

@ -0,0 +1 @@
c1a6adaf97f1f341b311ddf050d2b19c79fb1945

View File

@ -1 +0,0 @@
3fb1bcc1001a10b74ae91848c8558572891c1409

View File

@ -0,0 +1 @@
52e20edd7a5fc828cd19bb49a603d57d7d4f2cd7

View File

@ -1 +0,0 @@
9eff7f186877882f8b68f031f610bd7ab8c5c1fb

View File

@ -0,0 +1 @@
c28b1829a7510a59316761f0805072cf7441df24

View File

@ -1 +0,0 @@
6e6253936522f27b35ba4d8485806f517ef2df45

View File

@ -0,0 +1 @@
c2e5d4357f2dad4aff99b9457ea916d259cb09f4

View File

@ -1 +0,0 @@
8a313aa34b0070d3f7d48005e7677b680db1b09d

View File

@ -0,0 +1 @@
112959bececacfeaa72533ac94cca3d3d164550b

View File

@ -1 +0,0 @@
bf4c5a17cfb265d321ef4cfb0f3d7c1a6a6651de

View File

@ -0,0 +1 @@
975f42fac508bc999386955e449f5b91d123b569

View File

@ -1 +0,0 @@
2713a319d0aa696c65a32a36fda830bc482a5880

View File

@ -0,0 +1 @@
3744a71c00220ef98dfcffc8265325709224fee5

View File

@ -1 +0,0 @@
88251ecdbf877c15a94d4013aa5157f5b5ce4cea

View File

@ -0,0 +1 @@
e1fb855fc6711bc977587aecf42060d958f9f32b

View File

@ -1 +0,0 @@
bf9e522244c7c4eee6c3bcc3212ff057f7b88000

View File

@ -0,0 +1 @@
74914a9410a5f8a43e72ff77532ae481c61f6384

View File

@ -1 +0,0 @@
12d71cf10a4b79231dc488af16d723dfca5ab64b

View File

@ -0,0 +1 @@
f3a5c7242ecee80e80e5da0ff328897452cbec77

View File

@ -1 +0,0 @@
f903d67d042904527a7e2e8a75c55afe36a04251

View File

@ -0,0 +1 @@
054bd6d6e3762af6828ae29805e2c6ccd136aaf8

View File

@ -1 +0,0 @@
2f5758bbcf97048ab62d2d4ae73867d06f1ed03f

View File

@ -0,0 +1 @@
2580c4ccce1258580dbf8035e9e4ff1cf73b1cff

View File

@ -1 +0,0 @@
2cc29e4658be151658fac6e5ed7915982b6de861

View File

@ -0,0 +1 @@
56ddb993dda8b6c0d68d64b1d4be6e088df29669

View File

@ -1 +0,0 @@
f490a09ca056aba42e8751a469ef114df64aae0d

View File

@ -0,0 +1 @@
bce01a0ba74c0df5caaf2b112537024371d03df4

View File

@ -112,7 +112,11 @@ Type name: `DFR`
==== DFI similarity
Similarity that implements the http://trec.nist.gov/pubs/trec21/papers/irra.web.nb.pdf[divergence from independence]
model (normalized chi-squared distance)
model.
This similarity has the following options:
[horizontal]
`independence_measure`:: Possible values `standardized`, `saturated`, `chisquared`.
[float]
[[ib]]
@ -121,7 +125,7 @@ model (normalized chi-squared distance)
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/IBSimilarity.html[Information
based model] . The algorithm is based on the concept that the information content in any symbolic 'distribution'
sequence is primarily determined by the repetitive usage of its basic elements.
For written texts this challenge would correspond to comparing the writing styles of diferent authors.
For written texts this challenge would correspond to comparing the writing styles of different authors.
This similarity has the following options:
[horizontal]

View File

@ -1 +0,0 @@
31db8e49e4089772eae8ab2db0ac59bab6fbcd2f

View File

@ -0,0 +1 @@
477099ede788272484648ecd05d39d8745c74d6e

View File

@ -1 +0,0 @@
4504d3d993f094ed70585124df56c2be86c2615a

View File

@ -0,0 +1 @@
dc33b8449a6423132bf618bb1d32f464d191686d

View File

@ -1 +0,0 @@
15555d41d27bb398b6736be85a5eca4ca224b85d

View File

@ -0,0 +1 @@
d71ffab4f99835d863cd4b7b280469e62a98db61

View File

@ -1 +0,0 @@
9d43a338338a6c88e8071a0e3eeb51f4d9d0364a

View File

@ -0,0 +1 @@
30a9da299d3e4190833aebd07e814ce8fb9e9f78

View File

@ -1 +0,0 @@
b66c95032c5ca41ce7b85519c64aab4e9a233f78

View File

@ -0,0 +1 @@
a5f2374bc9180d842e823b681726ae2663ab1ebd

View File

@ -1 +0,0 @@
4f41bacd77ce372f10f2c57ab516b2ce9aa71173

View File

@ -0,0 +1 @@
7d0ae501ad604447e02206f86e6592bcafd6a3f1