Upgrade to lucene 5.5.0-snapshot-1725675
This commit is contained in:
parent
99cf7de514
commit
d5dc05f69e
|
@ -289,7 +289,7 @@ class BuildPlugin implements Plugin<Project> {
|
|||
String luceneVersion = VersionProperties.lucene
|
||||
if (luceneVersion.contains('-snapshot')) {
|
||||
// extract the revision number from the version with a regex matcher
|
||||
String revision = (luceneVersion =~ /\w+-snapshot-(\d+)/)[0][1]
|
||||
String revision = (luceneVersion =~ /\w+-snapshot-([a-z0-9]+)/)[0][1]
|
||||
repos.maven {
|
||||
name 'lucene-snapshots'
|
||||
url "http://s3.amazonaws.com/download.elasticsearch.org/lucenesnapshots/${revision}"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
elasticsearch = 3.0.0-SNAPSHOT
|
||||
lucene = 5.5.0-snapshot-1725675
|
||||
lucene = 5.5.0-snapshot-4de5f1d
|
||||
|
||||
# optional dependencies
|
||||
spatial4j = 0.5
|
||||
|
|
|
@ -63,7 +63,7 @@ public class AllField extends Field {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
|
||||
try {
|
||||
allEntries.reset(); // reset the all entries, just in case it was read already
|
||||
if (allEntries.customBoost() && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
|
||||
|
|
|
@ -320,7 +320,7 @@ public class DoubleFieldMapper extends NumberFieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
|
||||
if (fieldType().indexOptions() != IndexOptions.NONE) {
|
||||
return getCachedStream().setDoubleValue(number);
|
||||
}
|
||||
|
|
|
@ -332,7 +332,7 @@ public class FloatFieldMapper extends NumberFieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
|
||||
if (fieldType().indexOptions() != IndexOptions.NONE) {
|
||||
return getCachedStream().setFloatValue(number);
|
||||
}
|
||||
|
|
|
@ -340,7 +340,7 @@ public class IntegerFieldMapper extends NumberFieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
|
||||
if (fieldType().indexOptions() != IndexOptions.NONE) {
|
||||
return getCachedStream().setIntValue(number);
|
||||
}
|
||||
|
|
|
@ -323,7 +323,7 @@ public class LongFieldMapper extends NumberFieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
|
||||
if (fieldType().indexOptions() != IndexOptions.NONE) {
|
||||
return getCachedStream().setLongValue(number);
|
||||
}
|
||||
|
|
|
@ -408,7 +408,7 @@ public abstract class NumberFieldMapper extends FieldMapper implements AllFieldM
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
@ -332,7 +332,7 @@ public class ShortFieldMapper extends NumberFieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
|
||||
if (fieldType().indexOptions() != IndexOptions.NONE) {
|
||||
return getCachedStream().setIntValue(number);
|
||||
}
|
||||
|
|
|
@ -20,30 +20,58 @@
|
|||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.DFISimilarity;
|
||||
import org.apache.lucene.search.similarities.Independence;
|
||||
import org.apache.lucene.search.similarities.IndependenceChiSquared;
|
||||
import org.apache.lucene.search.similarities.IndependenceSaturated;
|
||||
import org.apache.lucene.search.similarities.IndependenceStandardized;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static java.util.Collections.unmodifiableMap;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for the {@link DFISimilarity}.
|
||||
* <p>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>independence_measure</li>
|
||||
* <li>discount_overlaps</li>
|
||||
* </ul>
|
||||
* @see DFISimilarity For more information about configuration
|
||||
*/
|
||||
public class DFISimilarityProvider extends AbstractSimilarityProvider {
|
||||
// the "basic models" of divergence from independence
|
||||
private static final Map<String, Independence> INDEPENDENCE_MEASURES;
|
||||
static {
|
||||
Map<String, Independence> measures = new HashMap<>();
|
||||
measures.put("standardized", new IndependenceStandardized());
|
||||
measures.put("saturated", new IndependenceSaturated());
|
||||
measures.put("chisquared", new IndependenceChiSquared());
|
||||
INDEPENDENCE_MEASURES = unmodifiableMap(measures);
|
||||
}
|
||||
|
||||
private final DFISimilarity similarity;
|
||||
|
||||
public DFISimilarityProvider(String name, Settings settings) {
|
||||
super(name);
|
||||
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
|
||||
|
||||
this.similarity = new DFISimilarity();
|
||||
Independence measure = parseIndependence(settings);
|
||||
this.similarity = new DFISimilarity(measure);
|
||||
this.similarity.setDiscountOverlaps(discountOverlaps);
|
||||
}
|
||||
|
||||
private Independence parseIndependence(Settings settings) {
|
||||
String name = settings.get("independence_measure");
|
||||
Independence measure = INDEPENDENCE_MEASURES.get(name);
|
||||
if (measure == null) {
|
||||
throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "]");
|
||||
}
|
||||
return measure;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
|
|
|
@ -52,8 +52,8 @@ import static java.util.Collections.unmodifiableMap;
|
|||
* @see DFRSimilarity For more information about configuration
|
||||
*/
|
||||
public class DFRSimilarityProvider extends AbstractSimilarityProvider {
|
||||
private static final Map<String, BasicModel> MODEL_CACHE;
|
||||
private static final Map<String, AfterEffect> EFFECT_CACHE;
|
||||
private static final Map<String, BasicModel> BASIC_MODELS;
|
||||
private static final Map<String, AfterEffect> AFTER_EFFECTS;
|
||||
|
||||
static {
|
||||
Map<String, BasicModel> models = new HashMap<>();
|
||||
|
@ -64,13 +64,13 @@ public class DFRSimilarityProvider extends AbstractSimilarityProvider {
|
|||
models.put("in", new BasicModelIn());
|
||||
models.put("ine", new BasicModelIne());
|
||||
models.put("p", new BasicModelP());
|
||||
MODEL_CACHE = unmodifiableMap(models);
|
||||
BASIC_MODELS = unmodifiableMap(models);
|
||||
|
||||
Map<String, AfterEffect> effects = new HashMap<>();
|
||||
effects.put("no", new AfterEffect.NoAfterEffect());
|
||||
effects.put("b", new AfterEffectB());
|
||||
effects.put("l", new AfterEffectL());
|
||||
EFFECT_CACHE = unmodifiableMap(effects);
|
||||
AFTER_EFFECTS = unmodifiableMap(effects);
|
||||
}
|
||||
|
||||
private final DFRSimilarity similarity;
|
||||
|
@ -91,7 +91,7 @@ public class DFRSimilarityProvider extends AbstractSimilarityProvider {
|
|||
*/
|
||||
protected BasicModel parseBasicModel(Settings settings) {
|
||||
String basicModel = settings.get("basic_model");
|
||||
BasicModel model = MODEL_CACHE.get(basicModel);
|
||||
BasicModel model = BASIC_MODELS.get(basicModel);
|
||||
if (model == null) {
|
||||
throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "]");
|
||||
}
|
||||
|
@ -106,7 +106,7 @@ public class DFRSimilarityProvider extends AbstractSimilarityProvider {
|
|||
*/
|
||||
protected AfterEffect parseAfterEffect(Settings settings) {
|
||||
String afterEffect = settings.get("after_effect");
|
||||
AfterEffect effect = EFFECT_CACHE.get(afterEffect);
|
||||
AfterEffect effect = AFTER_EFFECTS.get(afterEffect);
|
||||
if (effect == null) {
|
||||
throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "]");
|
||||
}
|
||||
|
|
|
@ -31,8 +31,8 @@ grant codeBase "${codebase.securesm-1.0.jar}" {
|
|||
//// Very special jar permissions:
|
||||
//// These are dangerous permissions that we don't want to grant to everything.
|
||||
|
||||
grant codeBase "${codebase.lucene-core-5.5.0-snapshot-1725675.jar}" {
|
||||
// needed to allow MMapDirectory's "unmap hack"
|
||||
grant codeBase "${codebase.lucene-core-5.5.0-snapshot-4de5f1d.jar}" {
|
||||
// needed to allow MMapDirectory's "unmap hack" (die unmap hack, die)
|
||||
permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
|
||||
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
|
||||
// NOTE: also needed for RAMUsageEstimator size calculations
|
||||
|
|
|
@ -31,7 +31,7 @@ grant codeBase "${codebase.securemock-1.2.jar}" {
|
|||
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
|
||||
};
|
||||
|
||||
grant codeBase "${codebase.lucene-test-framework-5.5.0-snapshot-1725675.jar}" {
|
||||
grant codeBase "${codebase.lucene-test-framework-5.5.0-snapshot-4de5f1d.jar}" {
|
||||
// needed by RamUsageTester
|
||||
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
|
||||
};
|
||||
|
|
|
@ -209,7 +209,7 @@ public class VersionsTests extends ESTestCase {
|
|||
this.version = version;
|
||||
}
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
|
||||
return new TokenStream() {
|
||||
boolean finished = true;
|
||||
final CharTermAttribute term = addAttribute(CharTermAttribute.class);
|
||||
|
|
|
@ -110,6 +110,7 @@ public class GeoPolygonQueryBuilderTests extends AbstractQueryTestCase<GeoPolygo
|
|||
* explicitly mapped
|
||||
*/
|
||||
@Override
|
||||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/16399")
|
||||
public void testToQuery() throws IOException {
|
||||
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||
super.testToQuery();
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.similarities.BasicModelG;
|
|||
import org.apache.lucene.search.similarities.DFRSimilarity;
|
||||
import org.apache.lucene.search.similarities.DistributionSPL;
|
||||
import org.apache.lucene.search.similarities.IBSimilarity;
|
||||
import org.apache.lucene.search.similarities.IndependenceChiSquared;
|
||||
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
||||
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
|
||||
import org.apache.lucene.search.similarities.LambdaTTF;
|
||||
|
@ -167,12 +168,14 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
|
||||
Settings indexSettings = Settings.settingsBuilder()
|
||||
.put("index.similarity.my_similarity.type", "DFI")
|
||||
.put("index.similarity.my_similarity.independence_measure", "chisquared")
|
||||
.build();
|
||||
IndexService indexService = createIndex("foo", indexSettings);
|
||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||
MappedFieldType fieldType = documentMapper.mappers().getMapper("field1").fieldType();
|
||||
assertThat(fieldType.similarity(), instanceOf(DFISimilarityProvider.class));
|
||||
assertThat(fieldType.similarity().get(), instanceOf(DFISimilarity.class));
|
||||
DFISimilarity similarity = (DFISimilarity) fieldType.similarity().get();
|
||||
assertThat(similarity.getIndependence(), instanceOf(IndependenceChiSquared.class));
|
||||
}
|
||||
|
||||
public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException {
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
528a695bb8882dbc3d9866335ac1bb3905cba4e3
|
|
@ -0,0 +1 @@
|
|||
c1a6adaf97f1f341b311ddf050d2b19c79fb1945
|
|
@ -1 +0,0 @@
|
|||
3fb1bcc1001a10b74ae91848c8558572891c1409
|
|
@ -0,0 +1 @@
|
|||
52e20edd7a5fc828cd19bb49a603d57d7d4f2cd7
|
|
@ -1 +0,0 @@
|
|||
9eff7f186877882f8b68f031f610bd7ab8c5c1fb
|
|
@ -0,0 +1 @@
|
|||
c28b1829a7510a59316761f0805072cf7441df24
|
|
@ -1 +0,0 @@
|
|||
6e6253936522f27b35ba4d8485806f517ef2df45
|
|
@ -0,0 +1 @@
|
|||
c2e5d4357f2dad4aff99b9457ea916d259cb09f4
|
|
@ -1 +0,0 @@
|
|||
8a313aa34b0070d3f7d48005e7677b680db1b09d
|
|
@ -0,0 +1 @@
|
|||
112959bececacfeaa72533ac94cca3d3d164550b
|
|
@ -1 +0,0 @@
|
|||
bf4c5a17cfb265d321ef4cfb0f3d7c1a6a6651de
|
|
@ -0,0 +1 @@
|
|||
975f42fac508bc999386955e449f5b91d123b569
|
|
@ -1 +0,0 @@
|
|||
2713a319d0aa696c65a32a36fda830bc482a5880
|
|
@ -0,0 +1 @@
|
|||
3744a71c00220ef98dfcffc8265325709224fee5
|
|
@ -1 +0,0 @@
|
|||
88251ecdbf877c15a94d4013aa5157f5b5ce4cea
|
|
@ -0,0 +1 @@
|
|||
e1fb855fc6711bc977587aecf42060d958f9f32b
|
|
@ -1 +0,0 @@
|
|||
bf9e522244c7c4eee6c3bcc3212ff057f7b88000
|
|
@ -0,0 +1 @@
|
|||
74914a9410a5f8a43e72ff77532ae481c61f6384
|
|
@ -1 +0,0 @@
|
|||
12d71cf10a4b79231dc488af16d723dfca5ab64b
|
|
@ -0,0 +1 @@
|
|||
f3a5c7242ecee80e80e5da0ff328897452cbec77
|
|
@ -1 +0,0 @@
|
|||
f903d67d042904527a7e2e8a75c55afe36a04251
|
|
@ -0,0 +1 @@
|
|||
054bd6d6e3762af6828ae29805e2c6ccd136aaf8
|
|
@ -1 +0,0 @@
|
|||
2f5758bbcf97048ab62d2d4ae73867d06f1ed03f
|
|
@ -0,0 +1 @@
|
|||
2580c4ccce1258580dbf8035e9e4ff1cf73b1cff
|
|
@ -1 +0,0 @@
|
|||
2cc29e4658be151658fac6e5ed7915982b6de861
|
|
@ -0,0 +1 @@
|
|||
56ddb993dda8b6c0d68d64b1d4be6e088df29669
|
|
@ -1 +0,0 @@
|
|||
f490a09ca056aba42e8751a469ef114df64aae0d
|
|
@ -0,0 +1 @@
|
|||
bce01a0ba74c0df5caaf2b112537024371d03df4
|
|
@ -112,7 +112,11 @@ Type name: `DFR`
|
|||
==== DFI similarity
|
||||
|
||||
Similarity that implements the http://trec.nist.gov/pubs/trec21/papers/irra.web.nb.pdf[divergence from independence]
|
||||
model (normalized chi-squared distance)
|
||||
model.
|
||||
This similarity has the following options:
|
||||
|
||||
[horizontal]
|
||||
`independence_measure`:: Possible values `standardized`, `saturated`, `chisquared`.
|
||||
|
||||
[float]
|
||||
[[ib]]
|
||||
|
@ -121,7 +125,7 @@ model (normalized chi-squared distance)
|
|||
http://lucene.apache.org/core/5_2_1/core/org/apache/lucene/search/similarities/IBSimilarity.html[Information
|
||||
based model] . The algorithm is based on the concept that the information content in any symbolic 'distribution'
|
||||
sequence is primarily determined by the repetitive usage of its basic elements.
|
||||
For written texts this challenge would correspond to comparing the writing styles of diferent authors.
|
||||
For written texts this challenge would correspond to comparing the writing styles of different authors.
|
||||
This similarity has the following options:
|
||||
|
||||
[horizontal]
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
31db8e49e4089772eae8ab2db0ac59bab6fbcd2f
|
|
@ -0,0 +1 @@
|
|||
477099ede788272484648ecd05d39d8745c74d6e
|
|
@ -1 +0,0 @@
|
|||
4504d3d993f094ed70585124df56c2be86c2615a
|
|
@ -0,0 +1 @@
|
|||
dc33b8449a6423132bf618bb1d32f464d191686d
|
|
@ -1 +0,0 @@
|
|||
15555d41d27bb398b6736be85a5eca4ca224b85d
|
|
@ -0,0 +1 @@
|
|||
d71ffab4f99835d863cd4b7b280469e62a98db61
|
|
@ -1 +0,0 @@
|
|||
9d43a338338a6c88e8071a0e3eeb51f4d9d0364a
|
|
@ -0,0 +1 @@
|
|||
30a9da299d3e4190833aebd07e814ce8fb9e9f78
|
|
@ -1 +0,0 @@
|
|||
b66c95032c5ca41ce7b85519c64aab4e9a233f78
|
|
@ -0,0 +1 @@
|
|||
a5f2374bc9180d842e823b681726ae2663ab1ebd
|
|
@ -1 +0,0 @@
|
|||
4f41bacd77ce372f10f2c57ab516b2ce9aa71173
|
|
@ -0,0 +1 @@
|
|||
7d0ae501ad604447e02206f86e6592bcafd6a3f1
|
Loading…
Reference in New Issue