mirror of https://github.com/apache/lucene.git
Remove unnecessary backward compatibility.
This commit is contained in:
parent
9ca3dd26e4
commit
bc9c4144df
|
@ -744,13 +744,13 @@ public final class CheckIndex implements Closeable {
|
|||
segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream, failFast);
|
||||
|
||||
// Test the Term Index
|
||||
segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, failFast, version);
|
||||
segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, failFast);
|
||||
|
||||
// Test Stored Fields
|
||||
segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream, failFast);
|
||||
|
||||
// Test Term Vectors
|
||||
segInfoStat.termVectorStatus = testTermVectors(reader, infoStream, verbose, crossCheckTermVectors, failFast, version);
|
||||
segInfoStat.termVectorStatus = testTermVectors(reader, infoStream, verbose, crossCheckTermVectors, failFast);
|
||||
|
||||
// Test Docvalues
|
||||
segInfoStat.docValuesStatus = testDocValues(reader, infoStream, failFast);
|
||||
|
@ -1209,7 +1209,7 @@ public final class CheckIndex implements Closeable {
|
|||
* checks Fields api is consistent with itself.
|
||||
* searcher is optional, to verify with queries. Can be null.
|
||||
*/
|
||||
private static Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, boolean doPrint, boolean isVectors, PrintStream infoStream, boolean verbose, Version version) throws IOException {
|
||||
private static Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, boolean doPrint, boolean isVectors, PrintStream infoStream, boolean verbose) throws IOException {
|
||||
// TODO: we should probably return our own stats thing...?!
|
||||
long startNS;
|
||||
if (doPrint) {
|
||||
|
@ -1465,20 +1465,17 @@ public final class CheckIndex implements Closeable {
|
|||
if (hasOffsets) {
|
||||
int startOffset = postings.startOffset();
|
||||
int endOffset = postings.endOffset();
|
||||
// In Lucene 7 we fixed IndexWriter to also enforce term vector offsets
|
||||
if (isVectors == false || version.onOrAfter(Version.LUCENE_7_0_0)) {
|
||||
if (startOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
|
||||
}
|
||||
if (startOffset < lastOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset + "; consider using the FixBrokenOffsets tool in Lucene's backward-codecs module to correct your index");
|
||||
}
|
||||
if (endOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
|
||||
}
|
||||
if (endOffset < startOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset);
|
||||
}
|
||||
if (startOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
|
||||
}
|
||||
if (startOffset < lastOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset + "; consider using the FixBrokenOffsets tool in Lucene's backward-codecs module to correct your index");
|
||||
}
|
||||
if (endOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
|
||||
}
|
||||
if (endOffset < startOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset);
|
||||
}
|
||||
lastOffset = startOffset;
|
||||
}
|
||||
|
@ -1745,15 +1742,15 @@ public final class CheckIndex implements Closeable {
|
|||
* Test the term index.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream, Version version) throws IOException {
|
||||
return testPostings(reader, infoStream, false, false, version);
|
||||
public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream) throws IOException {
|
||||
return testPostings(reader, infoStream, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the term index.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream, boolean verbose, boolean failFast, Version version) throws IOException {
|
||||
public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream, boolean verbose, boolean failFast) throws IOException {
|
||||
|
||||
// TODO: we should go and verify term vectors match, if
|
||||
// crossCheckTermVectors is on...
|
||||
|
@ -1768,7 +1765,7 @@ public final class CheckIndex implements Closeable {
|
|||
|
||||
final Fields fields = reader.getPostingsReader().getMergeInstance();
|
||||
final FieldInfos fieldInfos = reader.getFieldInfos();
|
||||
status = checkFields(fields, reader.getLiveDocs(), maxDoc, fieldInfos, true, false, infoStream, verbose, version);
|
||||
status = checkFields(fields, reader.getLiveDocs(), maxDoc, fieldInfos, true, false, infoStream, verbose);
|
||||
} catch (Throwable e) {
|
||||
if (failFast) {
|
||||
throw IOUtils.rethrowAlways(e);
|
||||
|
@ -2377,15 +2374,15 @@ public final class CheckIndex implements Closeable {
|
|||
* Test term vectors.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream, Version version) throws IOException {
|
||||
return testTermVectors(reader, infoStream, false, false, false, version);
|
||||
public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream) throws IOException {
|
||||
return testTermVectors(reader, infoStream, false, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test term vectors.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream, boolean verbose, boolean crossCheckTermVectors, boolean failFast, Version version) throws IOException {
|
||||
public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream, boolean verbose, boolean crossCheckTermVectors, boolean failFast) throws IOException {
|
||||
long startNS = System.nanoTime();
|
||||
final Status.TermVectorStatus status = new Status.TermVectorStatus();
|
||||
final FieldInfos fieldInfos = reader.getFieldInfos();
|
||||
|
@ -2425,7 +2422,7 @@ public final class CheckIndex implements Closeable {
|
|||
|
||||
if (tfv != null) {
|
||||
// First run with no deletions:
|
||||
checkFields(tfv, null, 1, fieldInfos, false, true, infoStream, verbose, version);
|
||||
checkFields(tfv, null, 1, fieldInfos, false, true, infoStream, verbose);
|
||||
|
||||
// Only agg stats if the doc is live:
|
||||
final boolean doStats = liveDocs == null || liveDocs.get(j);
|
||||
|
|
|
@ -1170,9 +1170,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Confirms that the incoming index sort (if any) matches the existing index sort (if any).
|
||||
* This is unfortunately just best effort, because it could be the old index only has unsorted flushed segments built
|
||||
* before {@link Version#LUCENE_6_5_0} (flushed segments are sorted in Lucene 7.0). */
|
||||
/** Confirms that the incoming index sort (if any) matches the existing index sort (if any). */
|
||||
private void validateIndexSort() throws CorruptIndexException {
|
||||
Sort indexSort = config.getIndexSort();
|
||||
if (indexSort != null) {
|
||||
|
@ -1180,7 +1178,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
Sort segmentIndexSort = info.info.getIndexSort();
|
||||
if (segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) {
|
||||
throw new IllegalArgumentException("cannot change previous indexSort=" + segmentIndexSort + " (from segment=" + info + ") to new indexSort=" + indexSort);
|
||||
} else if (segmentIndexSort == null && info.info.getVersion().onOrAfter(Version.LUCENE_6_5_0)) {
|
||||
} else if (segmentIndexSort == null) {
|
||||
// Flushed segments are not sorted if they were built with a version prior to 6.5.0
|
||||
throw new CorruptIndexException("segment not sorted with indexSort=" + segmentIndexSort, info.info.toString());
|
||||
}
|
||||
|
|
|
@ -312,7 +312,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
|
||||
|
||||
Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
|
||||
if (luceneVersion.onOrAfter(Version.LUCENE_6_0_0) == false) {
|
||||
if (luceneVersion.onOrAfter(Version.LUCENE_7_0_0) == false) {
|
||||
// TODO: should we check indexCreatedVersion instead?
|
||||
throw new IndexFormatTooOldException(input, "this index is too old (version: " + luceneVersion + ")");
|
||||
}
|
||||
|
|
|
@ -118,16 +118,9 @@ public class BM25Similarity extends Similarity {
|
|||
}
|
||||
|
||||
/** Cache of decoded bytes. */
|
||||
private static final float[] OLD_LENGTH_TABLE = new float[256];
|
||||
private static final float[] LENGTH_TABLE = new float[256];
|
||||
|
||||
static {
|
||||
for (int i = 1; i < 256; i++) {
|
||||
float f = SmallFloat.byte315ToFloat((byte)i);
|
||||
OLD_LENGTH_TABLE[i] = 1.0f / (f*f);
|
||||
}
|
||||
OLD_LENGTH_TABLE[0] = 1.0f / OLD_LENGTH_TABLE[255]; // otherwise inf
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
|
||||
}
|
||||
|
@ -137,12 +130,7 @@ public class BM25Similarity extends Similarity {
|
|||
@Override
|
||||
public final long computeNorm(FieldInvertState state) {
|
||||
final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
|
||||
int indexCreatedVersionMajor = state.getIndexCreatedVersionMajor();
|
||||
if (indexCreatedVersionMajor >= 7) {
|
||||
return SmallFloat.intToByte4(numTerms);
|
||||
} else {
|
||||
return SmallFloat.floatToByte315((float) (1 / Math.sqrt(numTerms)));
|
||||
}
|
||||
return SmallFloat.intToByte4(numTerms);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -205,19 +193,17 @@ public class BM25Similarity extends Similarity {
|
|||
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
|
||||
float avgdl = avgFieldLength(collectionStats);
|
||||
|
||||
float[] oldCache = new float[256];
|
||||
float[] cache = new float[256];
|
||||
for (int i = 0; i < cache.length; i++) {
|
||||
oldCache[i] = k1 * ((1 - b) + b * OLD_LENGTH_TABLE[i] / avgdl);
|
||||
cache[i] = k1 * ((1 - b) + b * LENGTH_TABLE[i] / avgdl);
|
||||
}
|
||||
return new BM25Stats(collectionStats.field(), boost, idf, avgdl, oldCache, cache);
|
||||
return new BM25Stats(collectionStats.field(), boost, idf, avgdl, cache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
|
||||
BM25Stats bm25stats = (BM25Stats) stats;
|
||||
return new BM25DocScorer(bm25stats, context.reader().getMetaData().getCreatedVersionMajor(), context.reader().getNormValues(bm25stats.field));
|
||||
return new BM25DocScorer(bm25stats, context.reader().getNormValues(bm25stats.field));
|
||||
}
|
||||
|
||||
private class BM25DocScorer extends SimScorer {
|
||||
|
@ -229,17 +215,12 @@ public class BM25Similarity extends Similarity {
|
|||
/** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
|
||||
private final float[] cache;
|
||||
|
||||
BM25DocScorer(BM25Stats stats, int indexCreatedVersionMajor, NumericDocValues norms) throws IOException {
|
||||
BM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
|
||||
this.stats = stats;
|
||||
this.weightValue = stats.weight * (k1 + 1);
|
||||
this.norms = norms;
|
||||
if (indexCreatedVersionMajor >= 7) {
|
||||
lengthCache = LENGTH_TABLE;
|
||||
cache = stats.cache;
|
||||
} else {
|
||||
lengthCache = OLD_LENGTH_TABLE;
|
||||
cache = stats.oldCache;
|
||||
}
|
||||
lengthCache = LENGTH_TABLE;
|
||||
cache = stats.cache;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -287,16 +268,15 @@ public class BM25Similarity extends Similarity {
|
|||
/** field name, for pulling norms */
|
||||
private final String field;
|
||||
/** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl)
|
||||
* for both OLD_LENGTH_TABLE and LENGTH_TABLE */
|
||||
private final float[] oldCache, cache;
|
||||
* for LENGTH_TABLE */
|
||||
private final float[] cache;
|
||||
|
||||
BM25Stats(String field, float boost, Explanation idf, float avgdl, float[] oldCache, float[] cache) {
|
||||
BM25Stats(String field, float boost, Explanation idf, float avgdl, float[] cache) {
|
||||
this.field = field;
|
||||
this.boost = boost;
|
||||
this.idf = idf;
|
||||
this.avgdl = avgdl;
|
||||
this.weight = idf.getValue() * boost;
|
||||
this.oldCache = oldCache;
|
||||
this.cache = cache;
|
||||
}
|
||||
|
||||
|
|
|
@ -191,7 +191,6 @@ public abstract class SimilarityBase extends Similarity {
|
|||
|
||||
@Override
|
||||
public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
|
||||
int indexCreatedVersionMajor = context.reader().getMetaData().getCreatedVersionMajor();
|
||||
if (stats instanceof MultiSimilarity.MultiStats) {
|
||||
// a multi term query (e.g. phrase). return the summation,
|
||||
// scoring almost as if it were boolean query
|
||||
|
@ -199,12 +198,12 @@ public abstract class SimilarityBase extends Similarity {
|
|||
SimScorer subScorers[] = new SimScorer[subStats.length];
|
||||
for (int i = 0; i < subScorers.length; i++) {
|
||||
BasicStats basicstats = (BasicStats) subStats[i];
|
||||
subScorers[i] = new BasicSimScorer(basicstats, indexCreatedVersionMajor, context.reader().getNormValues(basicstats.field));
|
||||
subScorers[i] = new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
|
||||
}
|
||||
return new MultiSimilarity.MultiSimScorer(subScorers);
|
||||
} else {
|
||||
BasicStats basicstats = (BasicStats) stats;
|
||||
return new BasicSimScorer(basicstats, indexCreatedVersionMajor, context.reader().getNormValues(basicstats.field));
|
||||
return new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -218,16 +217,9 @@ public abstract class SimilarityBase extends Similarity {
|
|||
// ------------------------------ Norm handling ------------------------------
|
||||
|
||||
/** Cache of decoded bytes. */
|
||||
private static final float[] OLD_LENGTH_TABLE = new float[256];
|
||||
private static final float[] LENGTH_TABLE = new float[256];
|
||||
|
||||
static {
|
||||
for (int i = 1; i < 256; i++) {
|
||||
float f = SmallFloat.byte315ToFloat((byte)i);
|
||||
OLD_LENGTH_TABLE[i] = 1.0f / (f*f);
|
||||
}
|
||||
OLD_LENGTH_TABLE[0] = 1.0f / OLD_LENGTH_TABLE[255]; // otherwise inf
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
|
||||
}
|
||||
|
@ -241,12 +233,7 @@ public abstract class SimilarityBase extends Similarity {
|
|||
numTerms = state.getLength() - state.getNumOverlap();
|
||||
else
|
||||
numTerms = state.getLength();
|
||||
int indexCreatedVersionMajor = state.getIndexCreatedVersionMajor();
|
||||
if (indexCreatedVersionMajor >= 7) {
|
||||
return SmallFloat.intToByte4(numTerms);
|
||||
} else {
|
||||
return SmallFloat.floatToByte315((float) (1 / Math.sqrt(numTerms)));
|
||||
}
|
||||
return SmallFloat.intToByte4(numTerms);
|
||||
}
|
||||
|
||||
// ----------------------------- Static methods ------------------------------
|
||||
|
@ -268,12 +255,10 @@ public abstract class SimilarityBase extends Similarity {
|
|||
final class BasicSimScorer extends SimScorer {
|
||||
private final BasicStats stats;
|
||||
private final NumericDocValues norms;
|
||||
private final float[] normCache;
|
||||
|
||||
BasicSimScorer(BasicStats stats, int indexCreatedVersionMajor, NumericDocValues norms) throws IOException {
|
||||
BasicSimScorer(BasicStats stats, NumericDocValues norms) throws IOException {
|
||||
this.stats = stats;
|
||||
this.norms = norms;
|
||||
this.normCache = indexCreatedVersionMajor >= 7 ? LENGTH_TABLE : OLD_LENGTH_TABLE;
|
||||
}
|
||||
|
||||
float getLengthValue(int doc) throws IOException {
|
||||
|
@ -281,7 +266,7 @@ public abstract class SimilarityBase extends Similarity {
|
|||
return 1F;
|
||||
}
|
||||
if (norms.advanceExact(doc)) {
|
||||
return normCache[Byte.toUnsignedInt((byte) norms.longValue())];
|
||||
return LENGTH_TABLE[Byte.toUnsignedInt((byte) norms.longValue())];
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -376,15 +376,6 @@ import org.apache.lucene.util.SmallFloat;
|
|||
*/
|
||||
public abstract class TFIDFSimilarity extends Similarity {
|
||||
|
||||
/** Cache of decoded bytes. */
|
||||
static final float[] OLD_NORM_TABLE = new float[256];
|
||||
|
||||
static {
|
||||
for (int i = 0; i < 256; i++) {
|
||||
OLD_NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.)
|
||||
|
@ -516,11 +507,7 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
numTerms = state.getLength() - state.getNumOverlap();
|
||||
else
|
||||
numTerms = state.getLength();
|
||||
if (state.getIndexCreatedVersionMajor() >= 7) {
|
||||
return SmallFloat.intToByte4(numTerms);
|
||||
} else {
|
||||
return SmallFloat.floatToByte315(lengthNorm(numTerms));
|
||||
}
|
||||
return SmallFloat.intToByte4(numTerms);
|
||||
}
|
||||
|
||||
/** Computes the amount of a sloppy phrase match, based on an edit distance.
|
||||
|
@ -569,14 +556,8 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
@Override
|
||||
public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
|
||||
IDFStats idfstats = (IDFStats) stats;
|
||||
final float[] normTable;
|
||||
if (context.reader().getMetaData().getCreatedVersionMajor() >= 7) {
|
||||
// the norms only encode the length, we need a translation table that depends on how lengthNorm is implemented
|
||||
normTable = idfstats.normTable;
|
||||
} else {
|
||||
// the norm is directly encoded in the index
|
||||
normTable = OLD_NORM_TABLE;
|
||||
}
|
||||
// the norms only encode the length, we need a translation table that depends on how lengthNorm is implemented
|
||||
final float[] normTable = idfstats.normTable;
|
||||
return new TFIDFSimScorer(idfstats, context.reader().getNormValues(idfstats.field), normTable);
|
||||
}
|
||||
|
||||
|
|
|
@ -32,19 +32,6 @@ import java.util.Locale;
|
|||
*/
|
||||
public final class Version {
|
||||
|
||||
/** Match settings and bugs in Lucene's 6.0 release.
|
||||
* @deprecated (7.0.0) Use latest
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Version LUCENE_6_0_0 = new Version(6, 0, 0);
|
||||
|
||||
/**
|
||||
* Match settings and bugs in Lucene's 6.5.0 release.
|
||||
* @deprecated Use latest
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Version LUCENE_6_5_0 = new Version(6, 5, 0);
|
||||
|
||||
/**
|
||||
* Match settings and bugs in Lucene's 7.0.0 release.
|
||||
* @deprecated (8.0.0) Use latest
|
||||
|
|
|
@ -17,24 +17,8 @@
|
|||
package org.apache.lucene.search.similarities;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestBM25Similarity extends LuceneTestCase {
|
||||
|
||||
|
|
|
@ -19,29 +19,24 @@ package org.apache.lucene.search.similarities;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.similarities.TFIDFSimilarity.IDFStats;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -163,16 +158,6 @@ public class TestClassicSimilarity extends LuceneTestCase {
|
|||
|
||||
public void testSaneNormValues() throws IOException {
|
||||
ClassicSimilarity sim = new ClassicSimilarity();
|
||||
for (int i = 0; i < 256; i++) {
|
||||
float boost = TFIDFSimilarity.OLD_NORM_TABLE[i];
|
||||
assertFalse("negative boost: " + boost + ", byte=" + i, boost < 0.0f);
|
||||
assertFalse("inf bost: " + boost + ", byte=" + i, Float.isInfinite(boost));
|
||||
assertFalse("nan boost for byte=" + i, Float.isNaN(boost));
|
||||
if (i > 0) {
|
||||
assertTrue("boost is not increasing: " + boost + ",byte=" + i, boost > TFIDFSimilarity.OLD_NORM_TABLE[i-1]);
|
||||
}
|
||||
}
|
||||
|
||||
TFIDFSimilarity.IDFStats stats = (IDFStats) sim.computeWeight(1f, new IndexSearcher(new MultiReader()).collectionStatistics("foo"));
|
||||
for (int i = 0; i < 256; i++) {
|
||||
float boost = stats.normTable[i];
|
||||
|
@ -185,20 +170,6 @@ public class TestClassicSimilarity extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static Explanation findExplanation(Explanation expl, String text) {
|
||||
if (expl.getDescription().startsWith(text)) {
|
||||
return expl;
|
||||
} else {
|
||||
for (Explanation sub : expl.getDetails()) {
|
||||
Explanation match = findExplanation(sub, text);
|
||||
if (match != null) {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public void testSameNormsAsBM25() {
|
||||
ClassicSimilarity sim1 = new ClassicSimilarity();
|
||||
BM25Similarity sim2 = new BM25Similarity();
|
||||
|
|
|
@ -20,23 +20,16 @@ package org.apache.lucene.search.similarities;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -44,14 +37,11 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimWeight;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
|
||||
/**
|
||||
* Tests the {@link SimilarityBase}-based Similarities. Contains unit tests and
|
||||
* integration tests for all Similarities and correctness tests for a select
|
||||
|
|
|
@ -334,9 +334,9 @@ public final class TestUtil {
|
|||
CheckIndex.testLiveDocs(codecReader, infoStream, true);
|
||||
CheckIndex.testFieldInfos(codecReader, infoStream, true);
|
||||
CheckIndex.testFieldNorms(codecReader, infoStream, true);
|
||||
CheckIndex.testPostings(codecReader, infoStream, false, true, Version.LUCENE_7_0_0);
|
||||
CheckIndex.testPostings(codecReader, infoStream, false, true);
|
||||
CheckIndex.testStoredFields(codecReader, infoStream, true);
|
||||
CheckIndex.testTermVectors(codecReader, infoStream, false, crossCheckTermVectors, true, Version.LUCENE_7_0_0);
|
||||
CheckIndex.testTermVectors(codecReader, infoStream, false, crossCheckTermVectors, true);
|
||||
CheckIndex.testDocValues(codecReader, infoStream, true);
|
||||
CheckIndex.testPoints(codecReader, infoStream, true);
|
||||
|
||||
|
|
|
@ -421,10 +421,10 @@ public final class FieldTypePluginLoader
|
|||
Version version = (configuredVersion != null) ?
|
||||
Config.parseLuceneVersionString(configuredVersion) : schema.getDefaultLuceneMatchVersion();
|
||||
|
||||
if (!version.onOrAfter(Version.LUCENE_6_0_0)) {
|
||||
if (!version.onOrAfter(Version.LUCENE_7_0_0)) {
|
||||
log.warn(pluginClassName + " is using deprecated " + version +
|
||||
" emulation. You should at some point declare and reindex to at least 6.0, because " +
|
||||
"5.x emulation is deprecated and will be removed in 7.0");
|
||||
" emulation. You should at some point declare and reindex to at least 7.0, because " +
|
||||
"6.x emulation is deprecated and will be removed in 8.0");
|
||||
}
|
||||
return version;
|
||||
}
|
||||
|
|
|
@ -68,7 +68,6 @@ import org.apache.solr.core.SolrResourceLoader;
|
|||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.response.SchemaXmlWriter;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.search.similarities.ClassicSimilarityFactory;
|
||||
import org.apache.solr.search.similarities.SchemaSimilarityFactory;
|
||||
import org.apache.solr.util.DOMUtil;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
|
@ -475,8 +474,7 @@ public class IndexSchema {
|
|||
Node node = (Node) xpath.evaluate(expression, document, XPathConstants.NODE);
|
||||
similarityFactory = readSimilarity(loader, node);
|
||||
if (similarityFactory == null) {
|
||||
final boolean modernSim = getDefaultLuceneMatchVersion().onOrAfter(Version.LUCENE_6_0_0);
|
||||
final Class simClass = modernSim ? SchemaSimilarityFactory.class : ClassicSimilarityFactory.class;
|
||||
final Class<?> simClass = SchemaSimilarityFactory.class;
|
||||
// use the loader to ensure proper SolrCoreAware handling
|
||||
similarityFactory = loader.newInstance(simClass.getName(), SimilarityFactory.class);
|
||||
similarityFactory.init(new ModifiableSolrParams());
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.schema;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
|
@ -65,12 +65,7 @@ public abstract class IndexSchemaFactory implements NamedListInitializedPlugin {
|
|||
factory = config.getResourceLoader().newInstance(info.className, IndexSchemaFactory.class);
|
||||
factory.init(info.initArgs);
|
||||
} else {
|
||||
if (config.luceneMatchVersion.onOrAfter(Version.LUCENE_6_0_0)) {
|
||||
// ManagedIndexSchemaFactory is SolrCoreAware so we must create using the resource loader
|
||||
factory = config.getResourceLoader().newInstance(ManagedIndexSchemaFactory.class.getName(), IndexSchemaFactory.class);
|
||||
} else {
|
||||
factory = new ClassicIndexSchemaFactory();
|
||||
}
|
||||
factory = config.getResourceLoader().newInstance(ManagedIndexSchemaFactory.class.getName(), IndexSchemaFactory.class);
|
||||
}
|
||||
IndexSchema schema = factory.create(resourceName, config);
|
||||
return schema;
|
||||
|
|
|
@ -114,9 +114,7 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
|
|||
Similarity defaultSim = null;
|
||||
if (null == defaultSimFromFieldType) {
|
||||
// nothing configured, choose a sensible implicit default...
|
||||
defaultSim = this.core.getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_6_0_0)
|
||||
? new BM25Similarity()
|
||||
: new ClassicSimilarity();
|
||||
defaultSim = new BM25Similarity();
|
||||
} else {
|
||||
FieldType defSimFT = core.getLatestSchema().getFieldTypeByName(defaultSimFromFieldType);
|
||||
if (null == defSimFT) {
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
that you fully re-index after changing this setting as it can
|
||||
affect both how text is indexed and queried.
|
||||
-->
|
||||
<luceneMatchVersion>7.0.0</luceneMatchVersion>
|
||||
<luceneMatchVersion>8.0.0</luceneMatchVersion>
|
||||
|
||||
<!-- <lib/> directives can be used to instruct Solr to load any Jars
|
||||
identified and use them to resolve any "plugins" specified in
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
*/
|
||||
package org.apache.solr.search.similarities;
|
||||
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.junit.After;
|
||||
|
||||
|
@ -40,13 +39,4 @@ public class TestNonDefinedSimilarityFactory extends BaseSimilarityTestCase {
|
|||
BM25Similarity sim = getSimilarity("text", BM25Similarity.class);
|
||||
assertEquals(0.75F, sim.getB(), 0.0F);
|
||||
}
|
||||
|
||||
public void testClassic() throws Exception {
|
||||
// any value below 6.0 should have this behavior
|
||||
System.setProperty("tests.luceneMatchVersion", "5.3");
|
||||
initCore("solrconfig-basic.xml","schema-tiny.xml");
|
||||
ClassicSimilarity sim = getSimilarity("text", ClassicSimilarity.class);
|
||||
assertEquals(true, sim.getDiscountOverlaps());
|
||||
System.clearProperty("tests.luceneMatchVersion");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue