mirror of https://github.com/apache/lucene.git
LUCENE-2236: per-field similarity
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1062927 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
25f5488fd2
commit
3f255f6cea
|
@ -131,6 +131,9 @@ Changes in backwards compatibility policy
|
|||
* LUCENE-2882: Cut over SpanQuery#getSpans to AtomicReaderContext to enforce
|
||||
per segment semantics on SpanQuery & Spans. (Simon Willnauer)
|
||||
|
||||
* LUCENE-2236: Similarity can now be configured on a per-field basis. See the
|
||||
migration notes in MIGRATE.txt for more details. (Robert Muir, Doron Cohen)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
||||
|
|
|
@ -331,3 +331,9 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
|
|||
toString() is no longer implemented by AttributeImpl, so if you have overridden
|
||||
toString(), port your customization over to reflectWith(). reflectAsString() would
|
||||
then return what toString() did before.
|
||||
|
||||
* LUCENE-2236: DefaultSimilarity can no longer be set statically (and dangerously) for the entire JVM.
|
||||
Instead, IndexWriterConfig and IndexSearcher now take a SimilarityProvider.
|
||||
Similarity can now be configured on a per-field basis.
|
||||
Similarity retains only the field-specific relevance methods such as tf() and idf().
|
||||
Methods that apply to the entire query such as coord() and queryNorm() exist in SimilarityProvider.
|
||||
|
|
|
@ -42,7 +42,8 @@ import org.apache.lucene.index.FieldInvertState;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermVectorOffsetInfo;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
|
@ -67,7 +68,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
private final InstantiatedIndex index;
|
||||
private final Analyzer analyzer;
|
||||
|
||||
private Similarity similarity = Similarity.getDefault(); // how to normalize;
|
||||
private SimilarityProvider similarityProvider = IndexSearcher.getDefaultSimilarityProvider(); // how to normalize;
|
||||
|
||||
private transient Set<String> fieldNameBuffer;
|
||||
/**
|
||||
|
@ -236,11 +237,12 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size();
|
||||
|
||||
if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
|
||||
final String fieldName = eFieldTermDocInfoFactoriesByTermText.getKey().fieldName;
|
||||
final FieldInvertState invertState = new FieldInvertState();
|
||||
invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost());
|
||||
invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
|
||||
final float norm = similarity.computeNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, invertState);
|
||||
normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = similarity.encodeNormValue(norm);
|
||||
final float norm = similarityProvider.get(fieldName).computeNorm(fieldName, invertState);
|
||||
normsByFieldNameAndDocumentNumber.get(fieldName)[document.getDocumentNumber()] = similarityProvider.get(fieldName).encodeNormValue(norm);
|
||||
} else {
|
||||
System.currentTimeMillis();
|
||||
}
|
||||
|
@ -659,12 +661,12 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
addDocument(doc, analyzer);
|
||||
}
|
||||
|
||||
public Similarity getSimilarity() {
|
||||
return similarity;
|
||||
public SimilarityProvider getSimilarityProvider() {
|
||||
return similarityProvider;
|
||||
}
|
||||
|
||||
public void setSimilarity(Similarity similarity) {
|
||||
this.similarity = similarity;
|
||||
public void setSimilarityProvider(SimilarityProvider similarityProvider) {
|
||||
this.similarityProvider = similarityProvider;
|
||||
}
|
||||
|
||||
public Analyzer getAnalyzer() {
|
||||
|
|
|
@ -57,6 +57,7 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.RAMDirectory; // for javadocs
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -1169,9 +1170,9 @@ public class MemoryIndex implements Serializable {
|
|||
};
|
||||
}
|
||||
|
||||
private Similarity getSimilarity() {
|
||||
if (searcher != null) return searcher.getSimilarity();
|
||||
return Similarity.getDefault();
|
||||
private SimilarityProvider getSimilarityProvider() {
|
||||
if (searcher != null) return searcher.getSimilarityProvider();
|
||||
return IndexSearcher.getDefaultSimilarityProvider();
|
||||
}
|
||||
|
||||
private void setSearcher(IndexSearcher searcher) {
|
||||
|
@ -1181,20 +1182,21 @@ public class MemoryIndex implements Serializable {
|
|||
/** performance hack: cache norms to avoid repeated expensive calculations */
|
||||
private byte[] cachedNorms;
|
||||
private String cachedFieldName;
|
||||
private Similarity cachedSimilarity;
|
||||
private SimilarityProvider cachedSimilarity;
|
||||
|
||||
@Override
|
||||
public byte[] norms(String fieldName) {
|
||||
byte[] norms = cachedNorms;
|
||||
Similarity sim = getSimilarity();
|
||||
SimilarityProvider sim = getSimilarityProvider();
|
||||
if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached?
|
||||
Info info = getInfo(fieldName);
|
||||
Similarity fieldSim = sim.get(fieldName);
|
||||
int numTokens = info != null ? info.numTokens : 0;
|
||||
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
|
||||
float boost = info != null ? info.getBoost() : 1.0f;
|
||||
FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
|
||||
float n = sim.computeNorm(fieldName, invertState);
|
||||
byte norm = sim.encodeNormValue(n);
|
||||
float n = fieldSim.computeNorm(fieldName, invertState);
|
||||
byte norm = fieldSim.encodeNormValue(n);
|
||||
norms = new byte[] {norm};
|
||||
|
||||
// cache it for future reuse
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.ArrayList;
|
|||
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -57,13 +58,13 @@ public class FieldNormModifier {
|
|||
System.exit(1);
|
||||
}
|
||||
|
||||
Similarity s = null;
|
||||
SimilarityProvider s = null;
|
||||
|
||||
if (args[1].equals("-d"))
|
||||
args[1] = DefaultSimilarity.class.getName();
|
||||
|
||||
try {
|
||||
s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance();
|
||||
s = Class.forName(args[1]).asSubclass(SimilarityProvider.class).newInstance();
|
||||
} catch (Exception e) {
|
||||
System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]);
|
||||
e.printStackTrace(System.err);
|
||||
|
@ -84,7 +85,7 @@ public class FieldNormModifier {
|
|||
|
||||
|
||||
private Directory dir;
|
||||
private Similarity sim;
|
||||
private SimilarityProvider sim;
|
||||
|
||||
/**
|
||||
* Constructor for code that wishes to use this class programmatically
|
||||
|
@ -93,7 +94,7 @@ public class FieldNormModifier {
|
|||
* @param d the Directory to modify
|
||||
* @param s the Similarity to use (can be null)
|
||||
*/
|
||||
public FieldNormModifier(Directory d, Similarity s) {
|
||||
public FieldNormModifier(Directory d, SimilarityProvider s) {
|
||||
dir = d;
|
||||
sim = s;
|
||||
}
|
||||
|
@ -111,7 +112,7 @@ public class FieldNormModifier {
|
|||
*/
|
||||
public void reSetNorms(String field) throws IOException {
|
||||
String fieldName = StringHelper.intern(field);
|
||||
|
||||
Similarity fieldSim = sim.get(field);
|
||||
IndexReader reader = null;
|
||||
try {
|
||||
reader = IndexReader.open(dir, false);
|
||||
|
@ -148,7 +149,7 @@ public class FieldNormModifier {
|
|||
for (int d = 0; d < termCounts.length; d++) {
|
||||
if (delDocs == null || !delDocs.get(d)) {
|
||||
invertState.setLength(termCounts[d]);
|
||||
subReader.setNorm(d, fieldName, sim.encodeNormValue(sim.computeNorm(fieldName, invertState)));
|
||||
subReader.setNorm(d, fieldName, fieldSim.encodeNormValue(fieldSim.computeNorm(fieldName, invertState)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.search.Collector;
|
|||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -42,7 +42,7 @@ public class TestFieldNormModifier extends LuceneTestCase {
|
|||
public Directory store;
|
||||
|
||||
/** inverts the normal notion of lengthNorm */
|
||||
public static Similarity s = new DefaultSimilarity() {
|
||||
public static SimilarityProvider s = new DefaultSimilarity() {
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.apache.lucene.search.Collector;
|
|||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -47,7 +47,7 @@ public class TestLengthNormModifier extends LuceneTestCase {
|
|||
public Directory store;
|
||||
|
||||
/** inverts the normal notion of lengthNorm */
|
||||
public static Similarity s = new DefaultSimilarity() {
|
||||
public static SimilarityProvider s = new DefaultSimilarity() {
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
|
||||
|
@ -163,7 +163,7 @@ public class TestLengthNormModifier extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// override the norms to be inverted
|
||||
Similarity s = new DefaultSimilarity() {
|
||||
SimilarityProvider s = new DefaultSimilarity() {
|
||||
@Override
|
||||
public float computeNorm(String fieldName, FieldInvertState state) {
|
||||
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.queryParser.standard.parser.EscapeQuerySyntaxImpl;
|
|||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.search.BooleanQuery.TooManyClauses;
|
||||
|
||||
/**
|
||||
|
@ -41,7 +41,7 @@ import org.apache.lucene.search.BooleanQuery.TooManyClauses;
|
|||
*
|
||||
* @see BooleanQueryNodeBuilder
|
||||
* @see BooleanQuery
|
||||
* @see Similarity#coord(int, int)
|
||||
* @see SimilarityProvider#coord(int, int)
|
||||
*/
|
||||
public class StandardBooleanQueryNodeBuilder implements StandardQueryBuilder {
|
||||
|
||||
|
|
|
@ -22,14 +22,14 @@ import java.util.List;
|
|||
import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode;
|
||||
import org.apache.lucene.queryParser.core.nodes.QueryNode;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
|
||||
/**
|
||||
* A {@link StandardBooleanQueryNode} has the same behavior as
|
||||
* {@link BooleanQueryNode}. It only indicates if the coord should be enabled or
|
||||
* not for this boolean query. <br/>
|
||||
*
|
||||
* @see Similarity#coord(int, int)
|
||||
* @see SimilarityProvider#coord(int, int)
|
||||
* @see BooleanQuery
|
||||
*/
|
||||
public class StandardBooleanQueryNode extends BooleanQueryNode {
|
||||
|
|
|
@ -30,7 +30,7 @@ import java.util.concurrent.atomic.AtomicLong;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMFile;
|
||||
|
@ -127,7 +127,7 @@ final class DocumentsWriter {
|
|||
private boolean aborting; // True if an abort is pending
|
||||
|
||||
PrintStream infoStream;
|
||||
Similarity similarity;
|
||||
SimilarityProvider similarityProvider;
|
||||
|
||||
// max # simultaneous threads; if there are more than
|
||||
// this, they wait for others to finish first
|
||||
|
@ -140,7 +140,7 @@ final class DocumentsWriter {
|
|||
DocumentsWriter docWriter;
|
||||
Analyzer analyzer;
|
||||
PrintStream infoStream;
|
||||
Similarity similarity;
|
||||
SimilarityProvider similarityProvider;
|
||||
int docID;
|
||||
Document doc;
|
||||
String maxTermPrefix;
|
||||
|
@ -284,7 +284,7 @@ final class DocumentsWriter {
|
|||
DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletes bufferedDeletes) throws IOException {
|
||||
this.directory = directory;
|
||||
this.writer = writer;
|
||||
this.similarity = writer.getConfig().getSimilarity();
|
||||
this.similarityProvider = writer.getConfig().getSimilarityProvider();
|
||||
this.maxThreadStates = maxThreadStates;
|
||||
this.fieldInfos = fieldInfos;
|
||||
this.bufferedDeletes = bufferedDeletes;
|
||||
|
@ -357,10 +357,10 @@ final class DocumentsWriter {
|
|||
}
|
||||
}
|
||||
|
||||
synchronized void setSimilarity(Similarity similarity) {
|
||||
this.similarity = similarity;
|
||||
synchronized void setSimilarityProvider(SimilarityProvider similarity) {
|
||||
this.similarityProvider = similarity;
|
||||
for(int i=0;i<threadStates.length;i++) {
|
||||
threadStates[i].docState.similarity = similarity;
|
||||
threadStates[i].docState.similarityProvider = similarity;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ final class DocumentsWriterThreadState {
|
|||
this.docWriter = docWriter;
|
||||
docState = new DocumentsWriter.DocState();
|
||||
docState.infoStream = docWriter.infoStream;
|
||||
docState.similarity = docWriter.similarity;
|
||||
docState.similarityProvider = docWriter.similarityProvider;
|
||||
docState.docWriter = docWriter;
|
||||
consumer = docWriter.consumer.addThread(this);
|
||||
}
|
||||
|
|
|
@ -21,7 +21,8 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
|
||||
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -111,7 +112,7 @@ public final class IndexWriterConfig implements Cloneable {
|
|||
private IndexDeletionPolicy delPolicy;
|
||||
private IndexCommit commit;
|
||||
private OpenMode openMode;
|
||||
private Similarity similarity;
|
||||
private SimilarityProvider similarityProvider;
|
||||
private int termIndexInterval; // TODO: this should be private to the codec, not settable here
|
||||
private MergeScheduler mergeScheduler;
|
||||
private long writeLockTimeout;
|
||||
|
@ -142,7 +143,7 @@ public final class IndexWriterConfig implements Cloneable {
|
|||
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
|
||||
commit = null;
|
||||
openMode = OpenMode.CREATE_OR_APPEND;
|
||||
similarity = Similarity.getDefault();
|
||||
similarityProvider = IndexSearcher.getDefaultSimilarityProvider();
|
||||
termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
|
||||
mergeScheduler = new ConcurrentMergeScheduler();
|
||||
writeLockTimeout = WRITE_LOCK_TIMEOUT;
|
||||
|
@ -234,25 +235,22 @@ public final class IndexWriterConfig implements Cloneable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Expert: set the {@link Similarity} implementation used by this IndexWriter.
|
||||
* Expert: set the {@link SimilarityProvider} implementation used by this IndexWriter.
|
||||
* <p>
|
||||
* <b>NOTE:</b> the similarity cannot be null. If <code>null</code> is passed,
|
||||
* the similarity will be set to the default.
|
||||
*
|
||||
* @see Similarity#setDefault(Similarity)
|
||||
* <b>NOTE:</b> the similarity provider cannot be null. If <code>null</code> is passed,
|
||||
* the similarity provider will be set to the default implementation (unspecified).
|
||||
*/
|
||||
public IndexWriterConfig setSimilarity(Similarity similarity) {
|
||||
this.similarity = similarity == null ? Similarity.getDefault() : similarity;
|
||||
public IndexWriterConfig setSimilarityProvider(SimilarityProvider similarityProvider) {
|
||||
this.similarityProvider = similarityProvider == null ? IndexSearcher.getDefaultSimilarityProvider() : similarityProvider;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: returns the {@link Similarity} implementation used by this
|
||||
* IndexWriter. This defaults to the current value of
|
||||
* {@link Similarity#getDefault()}.
|
||||
* Expert: returns the {@link SimilarityProvider} implementation used by this
|
||||
* IndexWriter.
|
||||
*/
|
||||
public Similarity getSimilarity() {
|
||||
return similarity;
|
||||
public SimilarityProvider getSimilarityProvider() {
|
||||
return similarityProvider;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -576,7 +574,7 @@ public final class IndexWriterConfig implements Cloneable {
|
|||
sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
|
||||
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
|
||||
sb.append("openMode=").append(openMode).append("\n");
|
||||
sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
|
||||
sb.append("similarityProvider=").append(similarityProvider.getClass().getName()).append("\n");
|
||||
sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
|
||||
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
|
||||
sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
/** Taps into DocInverter, as an InvertedDocEndConsumer,
|
||||
|
@ -29,7 +30,8 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
|
|||
final NormsWriterPerThread perThread;
|
||||
final FieldInfo fieldInfo;
|
||||
final DocumentsWriter.DocState docState;
|
||||
|
||||
final Similarity similarity;
|
||||
|
||||
// Holds all docID/norm pairs we've seen
|
||||
int[] docIDs = new int[1];
|
||||
byte[] norms = new byte[1];
|
||||
|
@ -49,6 +51,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
|
|||
this.fieldInfo = fieldInfo;
|
||||
docState = perThread.docState;
|
||||
fieldState = docInverterPerField.fieldState;
|
||||
similarity = docState.similarityProvider.get(fieldInfo.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -71,8 +74,8 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
|
|||
assert norms.length == upto;
|
||||
norms = ArrayUtil.grow(norms, 1+upto);
|
||||
}
|
||||
final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
|
||||
norms[upto] = docState.similarity.encodeNormValue(norm);
|
||||
final float norm = similarity.computeNorm(fieldInfo.name, fieldState);
|
||||
norms[upto] = similarity.encodeNormValue(norm);
|
||||
docIDs[upto] = docState.docID;
|
||||
upto++;
|
||||
}
|
||||
|
|
|
@ -72,18 +72,18 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
|
||||
/** Constructs an empty boolean query.
|
||||
*
|
||||
* {@link Similarity#coord(int,int)} may be disabled in scoring, as
|
||||
* {@link SimilarityProvider#coord(int,int)} may be disabled in scoring, as
|
||||
* appropriate. For example, this score factor does not make sense for most
|
||||
* automatically generated queries, like {@link WildcardQuery} and {@link
|
||||
* FuzzyQuery}.
|
||||
*
|
||||
* @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
|
||||
* @param disableCoord disables {@link SimilarityProvider#coord(int,int)} in scoring.
|
||||
*/
|
||||
public BooleanQuery(boolean disableCoord) {
|
||||
this.disableCoord = disableCoord;
|
||||
}
|
||||
|
||||
/** Returns true iff {@link Similarity#coord(int,int)} is disabled in
|
||||
/** Returns true iff {@link SimilarityProvider#coord(int,int)} is disabled in
|
||||
* scoring for this query instance.
|
||||
* @see #BooleanQuery(boolean)
|
||||
*/
|
||||
|
@ -162,14 +162,14 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
*/
|
||||
protected class BooleanWeight extends Weight {
|
||||
/** The Similarity implementation. */
|
||||
protected Similarity similarity;
|
||||
protected SimilarityProvider similarityProvider;
|
||||
protected ArrayList<Weight> weights;
|
||||
protected int maxCoord; // num optional + num required
|
||||
private final boolean disableCoord;
|
||||
|
||||
public BooleanWeight(IndexSearcher searcher, boolean disableCoord)
|
||||
throws IOException {
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.similarityProvider = searcher.getSimilarityProvider();
|
||||
this.disableCoord = disableCoord;
|
||||
weights = new ArrayList<Weight>(clauses.size());
|
||||
for (int i = 0 ; i < clauses.size(); i++) {
|
||||
|
@ -202,7 +202,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
}
|
||||
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return similarity.coord(overlap, maxOverlap);
|
||||
return similarityProvider.coord(overlap, maxOverlap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,7 +20,7 @@ import org.apache.lucene.index.FieldInvertState;
|
|||
*/
|
||||
|
||||
/** Expert: Default scoring implementation. */
|
||||
public class DefaultSimilarity extends Similarity {
|
||||
public class DefaultSimilarity extends Similarity implements SimilarityProvider {
|
||||
|
||||
/** Implemented as
|
||||
* <code>state.getBoost()*lengthNorm(numTerms)</code>, where
|
||||
|
@ -41,7 +41,6 @@ public class DefaultSimilarity extends Similarity {
|
|||
}
|
||||
|
||||
/** Implemented as <code>1/sqrt(sumOfSquaredWeights)</code>. */
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
return (float)(1.0 / Math.sqrt(sumOfSquaredWeights));
|
||||
}
|
||||
|
@ -65,7 +64,6 @@ public class DefaultSimilarity extends Similarity {
|
|||
}
|
||||
|
||||
/** Implemented as <code>overlap / maxOverlap</code>. */
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return overlap / (float)maxOverlap;
|
||||
}
|
||||
|
@ -90,4 +88,12 @@ public class DefaultSimilarity extends Similarity {
|
|||
public boolean getDiscountOverlaps() {
|
||||
return discountOverlaps;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this default implementation for all fields.
|
||||
* Override this method to customize scoring on a per-field basis.
|
||||
*/
|
||||
public Similarity get(String field) {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -70,8 +70,22 @@ public class IndexSearcher {
|
|||
private final ExecutorService executor;
|
||||
protected final IndexSearcher[] subSearchers;
|
||||
|
||||
/** The Similarity implementation used by this searcher. */
|
||||
private Similarity similarity = Similarity.getDefault();
|
||||
// the default SimilarityProvider
|
||||
private static final SimilarityProvider defaultProvider = new DefaultSimilarity();
|
||||
|
||||
/**
|
||||
* Expert: returns a default SimilarityProvider instance.
|
||||
* In general, this method is only called to initialize searchers and writers.
|
||||
* User code and query implementations should respect
|
||||
* {@link IndexSearcher#getSimilarityProvider()}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static SimilarityProvider getDefaultSimilarityProvider() {
|
||||
return defaultProvider;
|
||||
}
|
||||
|
||||
/** The SimilarityProvider implementation used by this searcher. */
|
||||
private SimilarityProvider similarityProvider = defaultProvider;
|
||||
|
||||
/** Creates a searcher searching the index in the named
|
||||
* directory, with readOnly=true
|
||||
|
@ -248,16 +262,15 @@ public class IndexSearcher {
|
|||
return reader.document(docID, fieldSelector);
|
||||
}
|
||||
|
||||
/** Expert: Set the Similarity implementation used by this Searcher.
|
||||
/** Expert: Set the SimilarityProvider implementation used by this Searcher.
|
||||
*
|
||||
* @see Similarity#setDefault(Similarity)
|
||||
*/
|
||||
public void setSimilarity(Similarity similarity) {
|
||||
this.similarity = similarity;
|
||||
public void setSimilarityProvider(SimilarityProvider similarityProvider) {
|
||||
this.similarityProvider = similarityProvider;
|
||||
}
|
||||
|
||||
public Similarity getSimilarity() {
|
||||
return similarity;
|
||||
public SimilarityProvider getSimilarityProvider() {
|
||||
return similarityProvider;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -98,7 +98,7 @@ public class MatchAllDocsQuery extends Query {
|
|||
private float queryNorm;
|
||||
|
||||
public MatchAllDocsWeight(IndexSearcher searcher) {
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.similarity = normsField == null ? null : searcher.getSimilarityProvider().get(normsField);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -139,7 +139,7 @@ public class MultiPhraseQuery extends Query {
|
|||
|
||||
public MultiPhraseWeight(IndexSearcher searcher)
|
||||
throws IOException {
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.similarity = searcher.getSimilarityProvider().get(field);
|
||||
|
||||
// compute idf
|
||||
ArrayList<Term> allTerms = new ArrayList<Term>();
|
||||
|
|
|
@ -146,7 +146,7 @@ public class PhraseQuery extends Query {
|
|||
|
||||
public PhraseWeight(IndexSearcher searcher)
|
||||
throws IOException {
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.similarity = searcher.getSimilarityProvider().get(field);
|
||||
|
||||
idfExp = similarity.idfExplain(terms, searcher);
|
||||
idf = idfExp.getIdf();
|
||||
|
|
|
@ -98,7 +98,7 @@ public abstract class Query implements java.io.Serializable, Cloneable {
|
|||
Query query = searcher.rewrite(this);
|
||||
Weight weight = query.createWeight(searcher);
|
||||
float sum = weight.sumOfSquaredWeights();
|
||||
float norm = searcher.getSimilarity().queryNorm(sum);
|
||||
float norm = searcher.getSimilarityProvider().queryNorm(sum);
|
||||
if (Float.isInfinite(norm) || Float.isNaN(norm))
|
||||
norm = 1.0f;
|
||||
weight.normalize(norm);
|
||||
|
|
|
@ -362,7 +362,7 @@ import org.apache.lucene.util.SmallFloat;
|
|||
* Typically, a document that contains more of the query's terms will receive a higher score
|
||||
* than another document with fewer query terms.
|
||||
* This is a search time factor computed in
|
||||
* {@link #coord(int, int) coord(q,d)}
|
||||
* {@link SimilarityProvider#coord(int, int) coord(q,d)}
|
||||
* by the Similarity in effect at search time.
|
||||
* <br> <br>
|
||||
* </li>
|
||||
|
@ -522,40 +522,13 @@ import org.apache.lucene.util.SmallFloat;
|
|||
* </li>
|
||||
* </ol>
|
||||
*
|
||||
* @see #setDefault(Similarity)
|
||||
* @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity)
|
||||
* @see IndexSearcher#setSimilarity(Similarity)
|
||||
* @see org.apache.lucene.index.IndexWriterConfig#setSimilarityProvider(SimilarityProvider)
|
||||
* @see IndexSearcher#setSimilarityProvider(SimilarityProvider)
|
||||
*/
|
||||
public abstract class Similarity implements Serializable {
|
||||
|
||||
/**
|
||||
* The Similarity implementation used by default.
|
||||
**/
|
||||
private static Similarity defaultImpl = new DefaultSimilarity();
|
||||
public static final int NO_DOC_ID_PROVIDED = -1;
|
||||
|
||||
/** Set the default Similarity implementation used by indexing and search
|
||||
* code.
|
||||
*
|
||||
* @see IndexSearcher#setSimilarity(Similarity)
|
||||
* @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity)
|
||||
*/
|
||||
public static void setDefault(Similarity similarity) {
|
||||
Similarity.defaultImpl = similarity;
|
||||
}
|
||||
|
||||
/** Return the default Similarity implementation used by indexing and search
|
||||
* code.
|
||||
*
|
||||
* <p>This is initially an instance of {@link DefaultSimilarity}.
|
||||
*
|
||||
* @see IndexSearcher#setSimilarity(Similarity)
|
||||
* @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity)
|
||||
*/
|
||||
public static Similarity getDefault() {
|
||||
return Similarity.defaultImpl;
|
||||
}
|
||||
|
||||
/** Cache of decoded bytes. */
|
||||
private static final float[] NORM_TABLE = new float[256];
|
||||
|
||||
|
@ -632,21 +605,6 @@ public abstract class Similarity implements Serializable {
|
|||
throw new UnsupportedOperationException("please use computeNorm instead");
|
||||
}
|
||||
|
||||
/** Computes the normalization value for a query given the sum of the squared
|
||||
* weights of each of the query terms. This value is multiplied into the
|
||||
* weight of each query term. While the classic query normalization factor is
|
||||
* computed as 1/sqrt(sumOfSquaredWeights), other implementations might
|
||||
* completely ignore sumOfSquaredWeights (ie return 1).
|
||||
*
|
||||
* <p>This does not affect ranking, but the default implementation does make scores
|
||||
* from different queries more comparable than they would be by eliminating the
|
||||
* magnitude of the Query vector as a factor in the score.
|
||||
*
|
||||
* @param sumOfSquaredWeights the sum of the squares of query term weights
|
||||
* @return a normalization factor for query weights
|
||||
*/
|
||||
public abstract float queryNorm(float sumOfSquaredWeights);
|
||||
|
||||
/** Encodes a normalization factor for storage in an index.
|
||||
*
|
||||
* <p>The encoding uses a three-bit mantissa, a five-bit exponent, and
|
||||
|
@ -816,20 +774,6 @@ public abstract class Similarity implements Serializable {
|
|||
*/
|
||||
public abstract float idf(int docFreq, int numDocs);
|
||||
|
||||
/** Computes a score factor based on the fraction of all query terms that a
|
||||
* document contains. This value is multiplied into scores.
|
||||
*
|
||||
* <p>The presence of a large portion of the query terms indicates a better
|
||||
* match with the query, so implementations of this method usually return
|
||||
* larger values when the ratio between these parameters is large and smaller
|
||||
* values when the ratio between them is small.
|
||||
*
|
||||
* @param overlap the number of query terms matched in the document
|
||||
* @param maxOverlap the total number of terms in the query
|
||||
* @return a score factor based on term overlap with the query
|
||||
*/
|
||||
public abstract float coord(int overlap, int maxOverlap);
|
||||
|
||||
/**
|
||||
* Calculate a scoring factor based on the data in the payload. Overriding implementations
|
||||
* are responsible for interpreting what is in the payload. Lucene makes no assumptions about
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Expert: Scoring API.
|
||||
*
|
||||
* Provides top-level scoring functions that aren't specific to a field,
|
||||
* and work across multi-field queries (such as {@link BooleanQuery}).
|
||||
*
|
||||
* Field-specific scoring is accomplished through {@link Similarity}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface SimilarityProvider {
|
||||
|
||||
/** Computes a score factor based on the fraction of all query terms that a
|
||||
* document contains. This value is multiplied into scores.
|
||||
*
|
||||
* <p>The presence of a large portion of the query terms indicates a better
|
||||
* match with the query, so implementations of this method usually return
|
||||
* larger values when the ratio between these parameters is large and smaller
|
||||
* values when the ratio between them is small.
|
||||
*
|
||||
* @param overlap the number of query terms matched in the document
|
||||
* @param maxOverlap the total number of terms in the query
|
||||
* @return a score factor based on term overlap with the query
|
||||
*/
|
||||
public abstract float coord(int overlap, int maxOverlap);
|
||||
|
||||
/** Computes the normalization value for a query given the sum of the squared
|
||||
* weights of each of the query terms. This value is multiplied into the
|
||||
* weight of each query term. While the classic query normalization factor is
|
||||
* computed as 1/sqrt(sumOfSquaredWeights), other implementations might
|
||||
* completely ignore sumOfSquaredWeights (ie return 1).
|
||||
*
|
||||
* <p>This does not affect ranking, but the default implementation does make scores
|
||||
* from different queries more comparable than they would be by eliminating the
|
||||
* magnitude of the Query vector as a factor in the score.
|
||||
*
|
||||
* @param sumOfSquaredWeights the sum of the squares of query term weights
|
||||
* @return a normalization factor for query weights
|
||||
*/
|
||||
public abstract float queryNorm(float sumOfSquaredWeights);
|
||||
|
||||
/** Returns a {@link Similarity} for scoring a field
|
||||
* @param field field name.
|
||||
* @return a field-specific Similarity.
|
||||
*/
|
||||
public abstract Similarity get(String field);
|
||||
}
|
|
@ -54,7 +54,7 @@ public class TermQuery extends Query {
|
|||
throws IOException {
|
||||
assert termStates != null : "PerReaderTermState must not be null";
|
||||
this.termStates = termStates;
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.similarity = searcher.getSimilarityProvider().get(term.field());
|
||||
if (docFreq != -1) {
|
||||
idfExp = similarity.idfExplain(term, searcher, docFreq);
|
||||
} else {
|
||||
|
|
|
@ -44,7 +44,7 @@ import org.apache.lucene.index.IndexReader.ReaderContext;
|
|||
* <code>IndexSearcher</code> ({@link Query#createWeight(IndexSearcher)}).
|
||||
* <li>The {@link #sumOfSquaredWeights()} method is called on the
|
||||
* <code>Weight</code> to compute the query normalization factor
|
||||
* {@link Similarity#queryNorm(float)} of the query clauses contained in the
|
||||
* {@link SimilarityProvider#queryNorm(float)} of the query clauses contained in the
|
||||
* query.
|
||||
* <li>The query normalization factor is passed to {@link #normalize(float)}. At
|
||||
* this point the weighting is complete.
|
||||
|
|
|
@ -42,7 +42,7 @@ public class SpanWeight extends Weight {
|
|||
|
||||
public SpanWeight(SpanQuery query, IndexSearcher searcher)
|
||||
throws IOException {
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.similarity = searcher.getSimilarityProvider().get(query.getField());
|
||||
this.query = query;
|
||||
|
||||
terms=new HashSet<Term>();
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
|
||||
|
||||
|
@ -220,7 +220,7 @@ class DocHelper {
|
|||
*/
|
||||
public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException
|
||||
{
|
||||
return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), Similarity.getDefault(), doc);
|
||||
return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), null, doc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -233,9 +233,9 @@ class DocHelper {
|
|||
* @param doc
|
||||
* @throws IOException
|
||||
*/
|
||||
public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException {
|
||||
public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException {
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
|
||||
TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity));
|
||||
//writer.setUseCompoundFile(false);
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
|
|
@ -38,12 +38,13 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -412,7 +413,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
Term searchTerm = new Term("id", "6");
|
||||
int delCount = reader.deleteDocuments(searchTerm);
|
||||
assertEquals("wrong delete count", 1, delCount);
|
||||
reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", searcher.getSimilarityProvider().get("content").encodeNormValue(2.0f));
|
||||
reader.close();
|
||||
searcher.close();
|
||||
|
||||
|
@ -460,7 +461,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
Term searchTerm = new Term("id", "6");
|
||||
int delCount = reader.deleteDocuments(searchTerm);
|
||||
assertEquals("wrong delete count", 1, delCount);
|
||||
reader.setNorm(22, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.setNorm(22, "content", searcher.getSimilarityProvider().get("content").encodeNormValue(2.0f));
|
||||
reader.close();
|
||||
|
||||
// make sure they "took":
|
||||
|
@ -519,7 +520,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
assertEquals("didn't delete the right number of documents", 1, delCount);
|
||||
|
||||
// Set one norm so we get a .s0 file:
|
||||
reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
|
||||
reader.setNorm(21, "content", conf.getSimilarityProvider().get("content").encodeNormValue(1.5f));
|
||||
reader.close();
|
||||
}
|
||||
|
||||
|
@ -556,7 +557,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
assertEquals("didn't delete the right number of documents", 1, delCount);
|
||||
|
||||
// Set one norm so we get a .s0 file:
|
||||
reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
|
||||
SimilarityProvider sim = new DefaultSimilarity();
|
||||
reader.setNorm(21, "content", sim.get("content").encodeNormValue(1.5f));
|
||||
reader.close();
|
||||
|
||||
// The numbering of fields can vary depending on which
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -608,7 +607,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
|||
writer.close();
|
||||
IndexReader reader = IndexReader.open(dir, policy, false);
|
||||
reader.deleteDocument(3*i+1);
|
||||
reader.setNorm(4*i+1, "content", Similarity.getDefault().encodeNormValue(2.0F));
|
||||
reader.setNorm(4*i+1, "content", conf.getSimilarityProvider().get("content").encodeNormValue(2.0F));
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(16*(1+i), hits.length);
|
||||
|
@ -716,7 +715,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
|||
writer.close();
|
||||
IndexReader reader = IndexReader.open(dir, policy, false);
|
||||
reader.deleteDocument(3);
|
||||
reader.setNorm(5, "content", Similarity.getDefault().encodeNormValue(2.0F));
|
||||
reader.setNorm(5, "content", conf.getSimilarityProvider().get("content").encodeNormValue(2.0F));
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(16, hits.length);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -67,9 +68,9 @@ public class TestIndexFileDeleter extends LuceneTestCase {
|
|||
Term searchTerm = new Term("id", "7");
|
||||
int delCount = reader.deleteDocuments(searchTerm);
|
||||
assertEquals("didn't delete the right number of documents", 1, delCount);
|
||||
|
||||
Similarity sim = new DefaultSimilarity().get("content");
|
||||
// Set one norm so we get a .s0 file:
|
||||
reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
|
||||
reader.setNorm(21, "content", sim.encodeNormValue(1.5f));
|
||||
reader.close();
|
||||
|
||||
// Now, artificially create an extra .del file & extra
|
||||
|
|
|
@ -39,11 +39,12 @@ import org.apache.lucene.document.SetBasedFieldSelector;
|
|||
import org.apache.lucene.index.IndexReader.FieldOption;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -464,8 +465,9 @@ public class TestIndexReader extends LuceneTestCase
|
|||
// expected
|
||||
}
|
||||
|
||||
Similarity sim = new DefaultSimilarity().get("aaa");
|
||||
try {
|
||||
reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.setNorm(5, "aaa", sim.encodeNormValue(2.0f));
|
||||
fail("setNorm after close failed to throw IOException");
|
||||
} catch (AlreadyClosedException e) {
|
||||
// expected
|
||||
|
@ -504,8 +506,9 @@ public class TestIndexReader extends LuceneTestCase
|
|||
// expected
|
||||
}
|
||||
|
||||
Similarity sim = new DefaultSimilarity().get("aaa");
|
||||
try {
|
||||
reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.setNorm(5, "aaa", sim.encodeNormValue(2.0f));
|
||||
fail("setNorm should have hit LockObtainFailedException");
|
||||
} catch (LockObtainFailedException e) {
|
||||
// expected
|
||||
|
@ -535,7 +538,8 @@ public class TestIndexReader extends LuceneTestCase
|
|||
|
||||
// now open reader & set norm for doc 0
|
||||
IndexReader reader = IndexReader.open(dir, false);
|
||||
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
Similarity sim = new DefaultSimilarity().get("content");
|
||||
reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
|
||||
|
||||
// we should be holding the write lock now:
|
||||
assertTrue("locked", IndexWriter.isLocked(dir));
|
||||
|
@ -549,7 +553,7 @@ public class TestIndexReader extends LuceneTestCase
|
|||
IndexReader reader2 = IndexReader.open(dir, false);
|
||||
|
||||
// set norm again for doc 0
|
||||
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(3.0f));
|
||||
reader.setNorm(0, "content", sim.encodeNormValue(3.0f));
|
||||
assertTrue("locked", IndexWriter.isLocked(dir));
|
||||
|
||||
reader.close();
|
||||
|
@ -579,15 +583,16 @@ public class TestIndexReader extends LuceneTestCase
|
|||
addDoc(writer, searchTerm.text());
|
||||
writer.close();
|
||||
|
||||
Similarity sim = new DefaultSimilarity().get("content");
|
||||
// now open reader & set norm for doc 0 (writes to
|
||||
// _0_1.s0)
|
||||
reader = IndexReader.open(dir, false);
|
||||
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
|
||||
reader.close();
|
||||
|
||||
// now open reader again & set norm for doc 0 (writes to _0_2.s0)
|
||||
reader = IndexReader.open(dir, false);
|
||||
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
|
||||
reader.close();
|
||||
assertFalse("failed to remove first generation norms file on writing second generation",
|
||||
dir.fileExists("_0_1.s0"));
|
||||
|
@ -966,13 +971,13 @@ public class TestIndexReader extends LuceneTestCase
|
|||
|
||||
dir.setMaxSizeInBytes(thisDiskFree);
|
||||
dir.setRandomIOExceptionRate(rate);
|
||||
|
||||
Similarity sim = new DefaultSimilarity().get("content");
|
||||
try {
|
||||
if (0 == x) {
|
||||
int docId = 12;
|
||||
for(int i=0;i<13;i++) {
|
||||
reader.deleteDocument(docId);
|
||||
reader.setNorm(docId, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.setNorm(docId, "content", sim.encodeNormValue(2.0f));
|
||||
docId += 12;
|
||||
}
|
||||
}
|
||||
|
@ -1130,8 +1135,9 @@ public class TestIndexReader extends LuceneTestCase
|
|||
}
|
||||
|
||||
reader = IndexReader.open(dir, false);
|
||||
Similarity sim = new DefaultSimilarity().get("content");
|
||||
try {
|
||||
reader.setNorm(1, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.setNorm(1, "content", sim.encodeNormValue(2.0f));
|
||||
fail("did not hit exception when calling setNorm on an invalid doc number");
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
// expected
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.SegmentReader.Norm;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -272,13 +273,14 @@ public class TestIndexReaderClone extends LuceneTestCase {
|
|||
* @throws Exception
|
||||
*/
|
||||
private void performDefaultTests(IndexReader r1) throws Exception {
|
||||
float norm1 = Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]);
|
||||
Similarity sim = new DefaultSimilarity().get("field1");
|
||||
float norm1 = sim.decodeNormValue(MultiNorms.norms(r1, "field1")[4]);
|
||||
|
||||
IndexReader pr1Clone = (IndexReader) r1.clone();
|
||||
pr1Clone.deleteDocument(10);
|
||||
pr1Clone.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f));
|
||||
assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1);
|
||||
assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1);
|
||||
pr1Clone.setNorm(4, "field1", sim.encodeNormValue(0.5f));
|
||||
assertTrue(sim.decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1);
|
||||
assertTrue(sim.decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1);
|
||||
|
||||
final Bits delDocs = MultiFields.getDeletedDocs(r1);
|
||||
assertTrue(delDocs == null || !delDocs.get(10));
|
||||
|
@ -327,7 +329,8 @@ public class TestIndexReaderClone extends LuceneTestCase {
|
|||
TestIndexReaderReopen.createIndex(random, dir1, false);
|
||||
SegmentReader origSegmentReader = getOnlySegmentReader(IndexReader.open(dir1, false));
|
||||
origSegmentReader.deleteDocument(1);
|
||||
origSegmentReader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f));
|
||||
Similarity sim = new DefaultSimilarity().get("field1");
|
||||
origSegmentReader.setNorm(4, "field1", sim.encodeNormValue(0.5f));
|
||||
|
||||
SegmentReader clonedSegmentReader = (SegmentReader) origSegmentReader
|
||||
.clone();
|
||||
|
@ -426,8 +429,9 @@ public class TestIndexReaderClone extends LuceneTestCase {
|
|||
final Directory dir1 = newDirectory();
|
||||
TestIndexReaderReopen.createIndex(random, dir1, false);
|
||||
IndexReader orig = IndexReader.open(dir1, false);
|
||||
orig.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(17.0f));
|
||||
final byte encoded = Similarity.getDefault().encodeNormValue(17.0f);
|
||||
Similarity sim = new DefaultSimilarity().get("field1");
|
||||
orig.setNorm(1, "field1", sim.encodeNormValue(17.0f));
|
||||
final byte encoded = sim.encodeNormValue(17.0f);
|
||||
assertEquals(encoded, MultiNorms.norms(orig, "field1")[1]);
|
||||
|
||||
// the cloned segmentreader should have 2 references, 1 to itself, and 1 to
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|||
import org.apache.lucene.index.SegmentReader.Norm;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
@ -50,7 +51,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
|
||||
private static final int NUM_FIELDS = 10;
|
||||
|
||||
private Similarity similarityOne;
|
||||
private SimilarityProvider similarityOne;
|
||||
|
||||
private Analyzer anlzr;
|
||||
|
||||
|
@ -203,19 +204,20 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
IndexReader reader4C = (IndexReader) reader3C.clone();
|
||||
SegmentReader segmentReader4C = getOnlySegmentReader(reader4C);
|
||||
assertEquals(4, reader3CCNorm.bytesRef().get());
|
||||
reader4C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.33f));
|
||||
Similarity sim = new DefaultSimilarity().get("field1");
|
||||
reader4C.setNorm(5, "field1", sim.encodeNormValue(0.33f));
|
||||
|
||||
// generate a cannot update exception in reader1
|
||||
try {
|
||||
reader3C.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(0.99f));
|
||||
reader3C.setNorm(1, "field1", sim.encodeNormValue(0.99f));
|
||||
fail("did not hit expected exception");
|
||||
} catch (Exception ex) {
|
||||
// expected
|
||||
}
|
||||
|
||||
// norm values should be different
|
||||
assertTrue(Similarity.getDefault().decodeNormValue(segmentReader3C.norms("field1")[5])
|
||||
!= Similarity.getDefault().decodeNormValue(segmentReader4C.norms("field1")[5]));
|
||||
assertTrue(sim.decodeNormValue(segmentReader3C.norms("field1")[5])
|
||||
!= sim.decodeNormValue(segmentReader4C.norms("field1")[5]));
|
||||
Norm reader4CCNorm = segmentReader4C.norms.get("field1");
|
||||
assertEquals(3, reader3CCNorm.bytesRef().get());
|
||||
assertEquals(1, reader4CCNorm.bytesRef().get());
|
||||
|
@ -223,7 +225,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
IndexReader reader5C = (IndexReader) reader4C.clone();
|
||||
SegmentReader segmentReader5C = getOnlySegmentReader(reader5C);
|
||||
Norm reader5CCNorm = segmentReader5C.norms.get("field1");
|
||||
reader5C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.7f));
|
||||
reader5C.setNorm(5, "field1", sim.encodeNormValue(0.7f));
|
||||
assertEquals(1, reader5CCNorm.bytesRef().get());
|
||||
|
||||
reader5C.close();
|
||||
|
@ -237,7 +239,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
private void createIndex(Random random, Directory dir) throws IOException {
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
|
||||
.setMaxBufferedDocs(5).setSimilarity(similarityOne));
|
||||
.setMaxBufferedDocs(5).setSimilarityProvider(similarityOne));
|
||||
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
|
||||
lmp.setMergeFactor(3);
|
||||
lmp.setUseCompoundFile(true);
|
||||
|
@ -256,8 +258,9 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
// System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
|
||||
modifiedNorms.set(i, Float.valueOf(newNorm));
|
||||
modifiedNorms.set(k, Float.valueOf(origNorm));
|
||||
ir.setNorm(i, "f" + 1, Similarity.getDefault().encodeNormValue(newNorm));
|
||||
ir.setNorm(k, "f" + 1, Similarity.getDefault().encodeNormValue(origNorm));
|
||||
Similarity sim = new DefaultSimilarity().get("f" + 1);
|
||||
ir.setNorm(i, "f" + 1, sim.encodeNormValue(newNorm));
|
||||
ir.setNorm(k, "f" + 1, sim.encodeNormValue(origNorm));
|
||||
// System.out.println("setNorm i: "+i);
|
||||
// break;
|
||||
}
|
||||
|
@ -277,7 +280,8 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
assertEquals("number of norms mismatches", numDocNorms, b.length);
|
||||
ArrayList<Float> storedNorms = (i == 1 ? modifiedNorms : norms);
|
||||
for (int j = 0; j < b.length; j++) {
|
||||
float norm = Similarity.getDefault().decodeNormValue(b[j]);
|
||||
Similarity sim = new DefaultSimilarity().get(field);
|
||||
float norm = sim.decodeNormValue(b[j]);
|
||||
float norm1 = storedNorms.get(j).floatValue();
|
||||
assertEquals("stored norm value of " + field + " for doc " + j + " is "
|
||||
+ norm + " - a mismatch!", norm, norm1, 0.000001);
|
||||
|
@ -289,7 +293,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
throws IOException {
|
||||
IndexWriterConfig conf = newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
|
||||
.setMaxBufferedDocs(5).setSimilarity(similarityOne);
|
||||
.setMaxBufferedDocs(5).setSimilarityProvider(similarityOne);
|
||||
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
|
||||
lmp.setMergeFactor(3);
|
||||
lmp.setUseCompoundFile(compound);
|
||||
|
@ -303,7 +307,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
// create the next document
|
||||
private Document newDoc() {
|
||||
Document d = new Document();
|
||||
float boost = nextNorm();
|
||||
float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Field f = newField("f" + i, "v" + i, Store.NO, Index.NOT_ANALYZED);
|
||||
f.setBoost(boost);
|
||||
|
@ -313,11 +317,12 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// return unique norm values that are unchanged by encoding/decoding
|
||||
private float nextNorm() {
|
||||
private float nextNorm(String fname) {
|
||||
float norm = lastNorm + normDelta;
|
||||
Similarity sim = new DefaultSimilarity().get(fname);
|
||||
do {
|
||||
float norm1 = Similarity.getDefault().decodeNormValue(
|
||||
Similarity.getDefault().encodeNormValue(norm));
|
||||
float norm1 = sim.decodeNormValue(
|
||||
sim.encodeNormValue(norm));
|
||||
if (norm1 > lastNorm) {
|
||||
// System.out.println(norm1+" > "+lastNorm);
|
||||
norm = norm1;
|
||||
|
|
|
@ -35,9 +35,11 @@ import org.apache.lucene.document.Field.Index;
|
|||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -615,8 +617,9 @@ public class TestIndexReaderReopen extends LuceneTestCase {
|
|||
|
||||
IndexReader reader2 = reader1.reopen();
|
||||
modifier = IndexReader.open(dir1, false);
|
||||
modifier.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(50f));
|
||||
modifier.setNorm(1, "field2", Similarity.getDefault().encodeNormValue(50f));
|
||||
SimilarityProvider sim = new DefaultSimilarity();
|
||||
modifier.setNorm(1, "field1", sim.get("field1").encodeNormValue(50f));
|
||||
modifier.setNorm(1, "field2", sim.get("field2").encodeNormValue(50f));
|
||||
modifier.close();
|
||||
|
||||
IndexReader reader3 = reader2.reopen();
|
||||
|
@ -709,7 +712,8 @@ public class TestIndexReaderReopen extends LuceneTestCase {
|
|||
protected void modifyIndex(int i) throws IOException {
|
||||
if (i % 3 == 0) {
|
||||
IndexReader modifier = IndexReader.open(dir, false);
|
||||
modifier.setNorm(i, "field1", Similarity.getDefault().encodeNormValue(50f));
|
||||
Similarity sim = new DefaultSimilarity().get("field1");
|
||||
modifier.setNorm(i, "field1", sim.encodeNormValue(50f));
|
||||
modifier.close();
|
||||
} else if (i % 3 == 1) {
|
||||
IndexReader modifier = IndexReader.open(dir, false);
|
||||
|
@ -989,9 +993,10 @@ public class TestIndexReaderReopen extends LuceneTestCase {
|
|||
}
|
||||
case 1: {
|
||||
IndexReader reader = IndexReader.open(dir, false);
|
||||
reader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(123f));
|
||||
reader.setNorm(44, "field2", Similarity.getDefault().encodeNormValue(222f));
|
||||
reader.setNorm(44, "field4", Similarity.getDefault().encodeNormValue(22f));
|
||||
SimilarityProvider sim = new DefaultSimilarity();
|
||||
reader.setNorm(4, "field1", sim.get("field1").encodeNormValue(123f));
|
||||
reader.setNorm(44, "field2", sim.get("field2").encodeNormValue(222f));
|
||||
reader.setNorm(44, "field4", sim.get("field4").encodeNormValue(22f));
|
||||
reader.close();
|
||||
break;
|
||||
}
|
||||
|
@ -1012,8 +1017,9 @@ public class TestIndexReaderReopen extends LuceneTestCase {
|
|||
}
|
||||
case 4: {
|
||||
IndexReader reader = IndexReader.open(dir, false);
|
||||
reader.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(123f));
|
||||
reader.setNorm(55, "field2", Similarity.getDefault().encodeNormValue(222f));
|
||||
SimilarityProvider sim = new DefaultSimilarity();
|
||||
reader.setNorm(5, "field1", sim.get("field1").encodeNormValue(123f));
|
||||
reader.setNorm(55, "field2", sim.get("field2").encodeNormValue(222f));
|
||||
reader.close();
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -55,7 +55,8 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
|||
assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
|
||||
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
|
||||
assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
|
||||
assertTrue(Similarity.getDefault() == conf.getSimilarity());
|
||||
// we don't need to assert this, it should be unspecified
|
||||
assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider());
|
||||
assertEquals(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.getTermIndexInterval());
|
||||
assertEquals(IndexWriterConfig.getDefaultWriteLockTimeout(), conf.getWriteLockTimeout());
|
||||
assertEquals(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.getDefaultWriteLockTimeout());
|
||||
|
@ -77,7 +78,7 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
|||
getters.add("getMaxFieldLength");
|
||||
getters.add("getMergeScheduler");
|
||||
getters.add("getOpenMode");
|
||||
getters.add("getSimilarity");
|
||||
getters.add("getSimilarityProvider");
|
||||
getters.add("getTermIndexInterval");
|
||||
getters.add("getWriteLockTimeout");
|
||||
getters.add("getDefaultWriteLockTimeout");
|
||||
|
@ -173,12 +174,13 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
|||
conf.setMergeScheduler(null);
|
||||
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
|
||||
|
||||
// Test Similarity
|
||||
assertTrue(Similarity.getDefault() == conf.getSimilarity());
|
||||
conf.setSimilarity(new MySimilarity());
|
||||
assertEquals(MySimilarity.class, conf.getSimilarity().getClass());
|
||||
conf.setSimilarity(null);
|
||||
assertTrue(Similarity.getDefault() == conf.getSimilarity());
|
||||
// Test Similarity:
|
||||
// we shouldnt assert what the default is, just that its not null.
|
||||
assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider());
|
||||
conf.setSimilarityProvider(new MySimilarity());
|
||||
assertEquals(MySimilarity.class, conf.getSimilarityProvider().getClass());
|
||||
conf.setSimilarityProvider(null);
|
||||
assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider());
|
||||
|
||||
// Test IndexingChain
|
||||
assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain());
|
||||
|
|
|
@ -46,7 +46,7 @@ public class TestMaxTermFrequency extends LuceneTestCase {
|
|||
dir = newDirectory();
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
config.setSimilarity(new TestSimilarity());
|
||||
config.setSimilarityProvider(new TestSimilarity());
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
||||
Document doc = new Document();
|
||||
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.document.Field.Store;
|
|||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
@ -49,7 +50,7 @@ public class TestNorms extends LuceneTestCase {
|
|||
|
||||
private static final int NUM_FIELDS = 10;
|
||||
|
||||
private Similarity similarityOne;
|
||||
private SimilarityProvider similarityOne;
|
||||
private Analyzer anlzr;
|
||||
private int numDocNorms;
|
||||
private ArrayList<Float> norms;
|
||||
|
@ -151,7 +152,7 @@ public class TestNorms extends LuceneTestCase {
|
|||
private void createIndex(Random random, Directory dir) throws IOException {
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
|
||||
.setMaxBufferedDocs(5).setSimilarity(similarityOne));
|
||||
.setMaxBufferedDocs(5).setSimilarityProvider(similarityOne));
|
||||
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
|
||||
lmp.setMergeFactor(3);
|
||||
lmp.setUseCompoundFile(true);
|
||||
|
@ -169,8 +170,9 @@ public class TestNorms extends LuceneTestCase {
|
|||
//System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
|
||||
modifiedNorms.set(i, Float.valueOf(newNorm));
|
||||
modifiedNorms.set(k, Float.valueOf(origNorm));
|
||||
ir.setNorm(i, "f"+1, Similarity.getDefault().encodeNormValue(newNorm));
|
||||
ir.setNorm(k, "f"+1, Similarity.getDefault().encodeNormValue(origNorm));
|
||||
Similarity sim = new DefaultSimilarity().get("f"+1);
|
||||
ir.setNorm(i, "f"+1, sim.encodeNormValue(newNorm));
|
||||
ir.setNorm(k, "f"+1, sim.encodeNormValue(origNorm));
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
|
@ -184,7 +186,7 @@ public class TestNorms extends LuceneTestCase {
|
|||
assertEquals("number of norms mismatches",numDocNorms,b.length);
|
||||
ArrayList<Float> storedNorms = (i==1 ? modifiedNorms : norms);
|
||||
for (int j = 0; j < b.length; j++) {
|
||||
float norm = similarityOne.decodeNormValue(b[j]);
|
||||
float norm = similarityOne.get(field).decodeNormValue(b[j]);
|
||||
float norm1 = storedNorms.get(j).floatValue();
|
||||
assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
|
||||
}
|
||||
|
@ -195,7 +197,7 @@ public class TestNorms extends LuceneTestCase {
|
|||
private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException {
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
|
||||
.setMaxBufferedDocs(5).setSimilarity(similarityOne));
|
||||
.setMaxBufferedDocs(5).setSimilarityProvider(similarityOne));
|
||||
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
|
||||
lmp.setMergeFactor(3);
|
||||
lmp.setUseCompoundFile(compound);
|
||||
|
@ -208,7 +210,7 @@ public class TestNorms extends LuceneTestCase {
|
|||
// create the next document
|
||||
private Document newDoc() {
|
||||
Document d = new Document();
|
||||
float boost = nextNorm();
|
||||
float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED);
|
||||
f.setBoost(boost);
|
||||
|
@ -218,10 +220,11 @@ public class TestNorms extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// return unique norm values that are unchanged by encoding/decoding
|
||||
private float nextNorm() {
|
||||
private float nextNorm(String fname) {
|
||||
float norm = lastNorm + normDelta;
|
||||
Similarity similarity = similarityOne.get(fname);
|
||||
do {
|
||||
float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm));
|
||||
float norm1 = similarity.decodeNormValue(similarity.encodeNormValue(norm));
|
||||
if (norm1 > lastNorm) {
|
||||
//System.out.println(norm1+" > "+lastNorm);
|
||||
norm = norm1;
|
||||
|
@ -258,7 +261,7 @@ public class TestNorms extends LuceneTestCase {
|
|||
public void testCustomEncoder() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
|
||||
config.setSimilarity(new CustomNormEncodingSimilarity());
|
||||
config.setSimilarityProvider(new CustomNormEncodingSimilarity());
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
||||
Document doc = new Document();
|
||||
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||
|
|
|
@ -35,13 +35,11 @@ import org.apache.lucene.search.Explanation.IDFExplanation;
|
|||
|
||||
public class TestOmitTf extends LuceneTestCase {
|
||||
|
||||
public static class SimpleSimilarity extends Similarity {
|
||||
public static class SimpleSimilarity extends Similarity implements SimilarityProvider {
|
||||
@Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
|
||||
@Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
|
||||
@Override public float tf(float freq) { return freq; }
|
||||
@Override public float sloppyFreq(int distance) { return 2.0f; }
|
||||
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
|
||||
@Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
|
||||
@Override public IDFExplanation idfExplain(Collection<Term> terms, IndexSearcher searcher) throws IOException {
|
||||
return new IDFExplanation() {
|
||||
@Override
|
||||
|
@ -54,6 +52,11 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
}
|
||||
public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
|
||||
public float coord(int overlap, int maxOverlap) { return 1.0f; }
|
||||
public Similarity get(String field) {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
// Tests whether the DocumentWriter correctly enable the
|
||||
|
@ -251,7 +254,7 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
dir,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
|
||||
setMaxBufferedDocs(2).
|
||||
setSimilarity(new SimpleSimilarity()).
|
||||
setSimilarityProvider(new SimpleSimilarity()).
|
||||
setMergePolicy(newLogMergePolicy(2))
|
||||
);
|
||||
|
||||
|
@ -281,7 +284,7 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
* Verify the index
|
||||
*/
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
searcher.setSimilarity(new SimpleSimilarity());
|
||||
searcher.setSimilarityProvider(new SimpleSimilarity());
|
||||
|
||||
Term a = new Term("noTf", term);
|
||||
Term b = new Term("tf", term);
|
||||
|
|
|
@ -147,7 +147,8 @@ public class TestParallelReader extends LuceneTestCase {
|
|||
|
||||
assertTrue(pr.isCurrent());
|
||||
IndexReader modifier = IndexReader.open(dir1, false);
|
||||
modifier.setNorm(0, "f1", Similarity.getDefault().encodeNormValue(100f));
|
||||
SimilarityProvider sim = new DefaultSimilarity();
|
||||
modifier.setNorm(0, "f1", sim.get("f1").encodeNormValue(100f));
|
||||
modifier.close();
|
||||
|
||||
// one of the two IndexReaders which ParallelReader is using
|
||||
|
@ -155,7 +156,7 @@ public class TestParallelReader extends LuceneTestCase {
|
|||
assertFalse(pr.isCurrent());
|
||||
|
||||
modifier = IndexReader.open(dir2, false);
|
||||
modifier.setNorm(0, "f3", Similarity.getDefault().encodeNormValue(100f));
|
||||
modifier.setNorm(0, "f3", sim.get("f3").encodeNormValue(100f));
|
||||
modifier.close();
|
||||
|
||||
// now both are not current anymore
|
||||
|
|
|
@ -242,11 +242,6 @@ final class JustCompileSearch {
|
|||
|
||||
static final class JustCompileSimilarity extends Similarity {
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float idf(int docFreq, int numDocs) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
|
@ -257,11 +252,6 @@ final class JustCompileSearch {
|
|||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float sloppyFreq(int distance) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
|
@ -270,8 +260,22 @@ final class JustCompileSearch {
|
|||
@Override
|
||||
public float tf(float freq) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
}
|
||||
|
||||
static final class JustCompileSimilarityProvider implements SimilarityProvider {
|
||||
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
public Similarity get(String field) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
}
|
||||
|
||||
static final class JustCompileSpanFilter extends SpanFilter {
|
||||
|
|
|
@ -158,7 +158,7 @@ public class QueryUtils {
|
|||
0 < edge ? r : IndexReader.open(makeEmptyIndex(random, 0), true))
|
||||
};
|
||||
IndexSearcher out = new IndexSearcher(new MultiReader(readers));
|
||||
out.setSimilarity(s.getSimilarity());
|
||||
out.setSimilarityProvider(s.getSimilarityProvider());
|
||||
return out;
|
||||
}
|
||||
|
||||
|
|
|
@ -208,9 +208,9 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
public void testQueries10() throws Exception {
|
||||
String queryText = "+w3 +xx +w2 zz";
|
||||
int[] expDocNrs = {2, 3};
|
||||
Similarity oldSimilarity = searcher.getSimilarity();
|
||||
SimilarityProvider oldSimilarity = searcher.getSimilarityProvider();
|
||||
try {
|
||||
searcher.setSimilarity(new DefaultSimilarity(){
|
||||
searcher.setSimilarityProvider(new DefaultSimilarity(){
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return overlap / ((float)maxOverlap - 1);
|
||||
|
@ -218,7 +218,7 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
});
|
||||
queriesTest(queryText, expDocNrs);
|
||||
} finally {
|
||||
searcher.setSimilarity(oldSimilarity);
|
||||
searcher.setSimilarityProvider(oldSimilarity);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ public class TestComplexExplanations extends TestExplanations {
|
|||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
searcher.setSimilarity(createQnorm1Similarity());
|
||||
searcher.setSimilarityProvider(createQnorm1Similarity());
|
||||
}
|
||||
|
||||
// must be static for weight serialization tests
|
||||
|
|
|
@ -97,7 +97,7 @@ public class TestConstantScoreQuery extends LuceneTestCase {
|
|||
searcher = new IndexSearcher(reader);
|
||||
|
||||
// set a similarity that does not normalize our boost away
|
||||
searcher.setSimilarity(new DefaultSimilarity() {
|
||||
searcher.setSimilarityProvider(new DefaultSimilarity() {
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
return 1.0f;
|
||||
|
|
|
@ -73,7 +73,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public Similarity sim = new TestSimilarity();
|
||||
public SimilarityProvider sim = new TestSimilarity();
|
||||
public Directory index;
|
||||
public IndexReader r;
|
||||
public IndexSearcher s;
|
||||
|
@ -85,7 +85,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
|||
index = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, index,
|
||||
newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setSimilarity(sim));
|
||||
.setSimilarityProvider(sim));
|
||||
|
||||
// hed is the most important field, dek is secondary
|
||||
|
||||
|
@ -150,7 +150,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
|||
r = new SlowMultiReaderWrapper(writer.getReader());
|
||||
writer.close();
|
||||
s = new IndexSearcher(r);
|
||||
s.setSimilarity(sim);
|
||||
s.setSimilarityProvider(sim);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -69,7 +69,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase {
|
|||
assertEquals("one", ir.document(hits[2].doc).get("key"));
|
||||
|
||||
// change norm & retest
|
||||
ir.setNorm(0, "key", Similarity.getDefault().encodeNormValue(400f));
|
||||
ir.setNorm(0, "key", is.getSimilarityProvider().get("key").encodeNormValue(400f));
|
||||
normsQuery = new MatchAllDocsQuery("key");
|
||||
hits = is.search(normsQuery, null, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
|
|
|
@ -295,7 +295,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
searcher.setSimilarity(new DefaultSimilarity() {
|
||||
searcher.setSimilarityProvider(new DefaultSimilarity() {
|
||||
|
||||
@Override
|
||||
public IDFExplanation idfExplain(Collection<Term> terms,
|
||||
|
|
|
@ -51,10 +51,11 @@ public class TestSetNorm extends LuceneTestCase {
|
|||
|
||||
// reset the boost of each instance of this document
|
||||
IndexReader reader = IndexReader.open(store, false);
|
||||
reader.setNorm(0, "field", Similarity.getDefault().encodeNormValue(1.0f));
|
||||
reader.setNorm(1, "field", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.setNorm(2, "field", Similarity.getDefault().encodeNormValue(4.0f));
|
||||
reader.setNorm(3, "field", Similarity.getDefault().encodeNormValue(16.0f));
|
||||
Similarity similarity = new DefaultSimilarity().get("field");
|
||||
reader.setNorm(0, "field", similarity.encodeNormValue(1.0f));
|
||||
reader.setNorm(1, "field", similarity.encodeNormValue(2.0f));
|
||||
reader.setNorm(2, "field", similarity.encodeNormValue(4.0f));
|
||||
reader.setNorm(3, "field", similarity.encodeNormValue(16.0f));
|
||||
reader.close();
|
||||
|
||||
// check that searches are ordered by this boost
|
||||
|
|
|
@ -39,13 +39,11 @@ import org.apache.lucene.search.Explanation.IDFExplanation;
|
|||
*/
|
||||
public class TestSimilarity extends LuceneTestCase {
|
||||
|
||||
public static class SimpleSimilarity extends Similarity {
|
||||
public static class SimpleSimilarity extends Similarity implements SimilarityProvider {
|
||||
@Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
|
||||
@Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
|
||||
@Override public float tf(float freq) { return freq; }
|
||||
@Override public float sloppyFreq(int distance) { return 2.0f; }
|
||||
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
|
||||
@Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
|
||||
@Override public IDFExplanation idfExplain(Collection<Term> terms, IndexSearcher searcher) throws IOException {
|
||||
return new IDFExplanation() {
|
||||
@Override
|
||||
|
@ -58,13 +56,18 @@ public class TestSimilarity extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
}
|
||||
public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
|
||||
public float coord(int overlap, int maxOverlap) { return 1.0f; }
|
||||
public Similarity get(String field) {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public void testSimilarity() throws Exception {
|
||||
Directory store = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, store,
|
||||
newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setSimilarity(new SimpleSimilarity()));
|
||||
.setSimilarityProvider(new SimpleSimilarity()));
|
||||
|
||||
Document d1 = new Document();
|
||||
d1.add(newField("field", "a c", Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -78,7 +81,7 @@ public class TestSimilarity extends LuceneTestCase {
|
|||
writer.close();
|
||||
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
searcher.setSimilarity(new SimpleSimilarity());
|
||||
searcher.setSimilarityProvider(new SimpleSimilarity());
|
||||
|
||||
Term a = new Term("field", "a");
|
||||
Term b = new Term("field", "b");
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiNorms;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestSimilarityProvider extends LuceneTestCase {
|
||||
private Directory directory;
|
||||
private IndexReader reader;
|
||||
private IndexSearcher searcher;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = newDirectory();
|
||||
SimilarityProvider sim = new ExampleSimilarityProvider();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer()).setSimilarityProvider(sim);
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random, directory, iwc);
|
||||
Document doc = new Document();
|
||||
Field field = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||
doc.add(field);
|
||||
Field field2 = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||
doc.add(field2);
|
||||
|
||||
field.setValue("quick brown fox");
|
||||
field2.setValue("quick brown fox");
|
||||
iw.addDocument(doc);
|
||||
field.setValue("jumps over lazy brown dog");
|
||||
field2.setValue("jumps over lazy brown dog");
|
||||
iw.addDocument(doc);
|
||||
reader = iw.getReader();
|
||||
iw.close();
|
||||
searcher = new IndexSearcher(reader);
|
||||
searcher.setSimilarityProvider(sim);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
searcher.close();
|
||||
reader.close();
|
||||
directory.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testBasics() throws Exception {
|
||||
// sanity check of norms writer
|
||||
byte fooNorms[] = MultiNorms.norms(reader, "foo");
|
||||
byte barNorms[] = MultiNorms.norms(reader, "bar");
|
||||
for (int i = 0; i < fooNorms.length; i++) {
|
||||
assertFalse(fooNorms[i] == barNorms[i]);
|
||||
}
|
||||
|
||||
// sanity check of searching
|
||||
TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10);
|
||||
assertTrue(foodocs.totalHits > 0);
|
||||
TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10);
|
||||
assertTrue(bardocs.totalHits > 0);
|
||||
assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score);
|
||||
}
|
||||
|
||||
private class ExampleSimilarityProvider implements SimilarityProvider {
|
||||
private Similarity sim1 = new Sim1();
|
||||
private Similarity sim2 = new Sim2();
|
||||
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return 1f;
|
||||
}
|
||||
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
return 1f;
|
||||
}
|
||||
|
||||
public Similarity get(String field) {
|
||||
if (field.equals("foo")) {
|
||||
return sim1;
|
||||
} else {
|
||||
return sim2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class Sim1 extends Similarity {
|
||||
@Override
|
||||
public float computeNorm(String field, FieldInvertState state) {
|
||||
return 1f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float sloppyFreq(int distance) {
|
||||
return 1f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float tf(float freq) {
|
||||
return 1f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float idf(int docFreq, int numDocs) {
|
||||
return 1f;
|
||||
}
|
||||
}
|
||||
|
||||
private class Sim2 extends Similarity {
|
||||
@Override
|
||||
public float computeNorm(String field, FieldInvertState state) {
|
||||
return 10f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float sloppyFreq(int distance) {
|
||||
return 10f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float tf(float freq) {
|
||||
return 10f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float idf(int docFreq, int numDocs) {
|
||||
return 10f;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -111,13 +111,13 @@ public class PayloadHelper {
|
|||
* @throws IOException
|
||||
*/
|
||||
// TODO: randomize
|
||||
public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
|
||||
public IndexSearcher setUp(Random random, SimilarityProvider similarity, int numDocs) throws IOException {
|
||||
Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
|
||||
PayloadAnalyzer analyzer = new PayloadAnalyzer();
|
||||
|
||||
// TODO randomize this
|
||||
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
|
||||
TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity));
|
||||
// writer.infoStream = System.out;
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
|
@ -130,7 +130,7 @@ public class PayloadHelper {
|
|||
writer.close();
|
||||
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
searcher.setSimilarity(similarity);
|
||||
searcher.setSimilarityProvider(similarity);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
|
|
|
@ -105,7 +105,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
|
||||
.setSimilarity(similarity));
|
||||
.setSimilarityProvider(similarity));
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
Document doc = new Document();
|
||||
|
@ -118,7 +118,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
writer.close();
|
||||
|
||||
searcher = new IndexSearcher(reader);
|
||||
searcher.setSimilarity(similarity);
|
||||
searcher.setSimilarityProvider(similarity);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -110,7 +110,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
|
||||
.setSimilarity(similarity));
|
||||
.setSimilarityProvider(similarity));
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
Document doc = new Document();
|
||||
|
@ -125,7 +125,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
writer.close();
|
||||
|
||||
searcher = new IndexSearcher(reader);
|
||||
searcher.setSimilarity(similarity);
|
||||
searcher.setSimilarityProvider(similarity);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -220,7 +220,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
new MaxPayloadFunction(), false);
|
||||
|
||||
IndexSearcher theSearcher = new IndexSearcher(directory, true);
|
||||
theSearcher.setSimilarity(new FullSimilarity());
|
||||
theSearcher.setSimilarityProvider(new FullSimilarity());
|
||||
TopDocs hits = searcher.search(query, null, 100);
|
||||
assertTrue("hits is null and it shouldn't be", hits != null);
|
||||
assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
|
||||
|
|
|
@ -39,7 +39,7 @@ import org.apache.lucene.index.Payload;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.payloads.PayloadHelper;
|
||||
|
@ -50,7 +50,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
|
||||
public class TestPayloadSpans extends LuceneTestCase {
|
||||
private IndexSearcher searcher;
|
||||
private Similarity similarity = new DefaultSimilarity();
|
||||
private SimilarityProvider similarity = new DefaultSimilarity();
|
||||
protected IndexReader indexReader;
|
||||
private IndexReader closeIndexReader;
|
||||
private Directory directory;
|
||||
|
@ -110,7 +110,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity));
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(newField(PayloadHelper.FIELD, "one two three one four three",
|
||||
|
@ -370,7 +370,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
public void testPayloadSpanUtil() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity));
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(newField(PayloadHelper.FIELD,"xx rr yy mm pp", Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -430,7 +430,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
directory = newDirectory();
|
||||
String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"};
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity));
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity));
|
||||
|
||||
Document doc = null;
|
||||
for(int i = 0; i < docs.length; i++) {
|
||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.lucene.search.spans;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.CheckHits;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Weight.ScorerContext;
|
||||
|
@ -410,17 +410,17 @@ public class TestSpans extends LuceneTestCase {
|
|||
for (int i = 0; i < leaves.length; i++) {
|
||||
|
||||
|
||||
final Similarity sim = new DefaultSimilarity() {
|
||||
final SimilarityProvider sim = new DefaultSimilarity() {
|
||||
@Override
|
||||
public float sloppyFreq(int distance) {
|
||||
return 0.0f;
|
||||
}
|
||||
};
|
||||
|
||||
final Similarity oldSim = searcher.getSimilarity();
|
||||
final SimilarityProvider oldSim = searcher.getSimilarityProvider();
|
||||
Scorer spanScorer;
|
||||
try {
|
||||
searcher.setSimilarity(sim);
|
||||
searcher.setSimilarityProvider(sim);
|
||||
SpanNearQuery snq = new SpanNearQuery(
|
||||
new SpanQuery[] {
|
||||
makeSpanTermQuery("t1"),
|
||||
|
@ -430,7 +430,7 @@ public class TestSpans extends LuceneTestCase {
|
|||
|
||||
spanScorer = snq.weight(searcher).scorer(leaves[i], ScorerContext.def());
|
||||
} finally {
|
||||
searcher.setSimilarity(oldSim);
|
||||
searcher.setSimilarityProvider(oldSim);
|
||||
}
|
||||
if (i == subIndex) {
|
||||
assertTrue("first doc", spanScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
|
|
@ -20,7 +20,8 @@ package org.apache.solr.schema;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.common.ResourceLoader;
|
||||
|
@ -192,7 +193,7 @@ public final class IndexSchema {
|
|||
/**
|
||||
* Returns the Similarity used for this index
|
||||
*/
|
||||
public Similarity getSimilarity() { return similarityFactory.getSimilarity(); }
|
||||
public SimilarityProvider getSimilarityProvider() { return similarityFactory.getSimilarityProvider(); }
|
||||
|
||||
/**
|
||||
* Returns the SimilarityFactory used for this index
|
||||
|
@ -496,8 +497,8 @@ public final class IndexSchema {
|
|||
Node node = (Node) xpath.evaluate("/schema/similarity", document, XPathConstants.NODE);
|
||||
if (node==null) {
|
||||
similarityFactory = new SimilarityFactory() {
|
||||
public Similarity getSimilarity() {
|
||||
return Similarity.getDefault();
|
||||
public SimilarityProvider getSimilarityProvider() {
|
||||
return IndexSearcher.getDefaultSimilarityProvider();
|
||||
}
|
||||
};
|
||||
log.debug("using default similarity");
|
||||
|
@ -509,10 +510,10 @@ public final class IndexSchema {
|
|||
similarityFactory = (SimilarityFactory)obj;
|
||||
similarityFactory.init(params);
|
||||
} else {
|
||||
// just like always, assume it's a Similarlity and get a ClassCastException - reasonable error handling
|
||||
// just like always, assume it's a SimilarityProvider and get a ClassCastException - reasonable error handling
|
||||
similarityFactory = new SimilarityFactory() {
|
||||
public Similarity getSimilarity() {
|
||||
return (Similarity) obj;
|
||||
public SimilarityProvider getSimilarityProvider() {
|
||||
return (SimilarityProvider) obj;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ package org.apache.solr.schema;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
||||
public abstract class SimilarityFactory {
|
||||
|
@ -25,5 +25,5 @@ public abstract class SimilarityFactory {
|
|||
public void init(SolrParams params) { this.params = params; }
|
||||
public SolrParams getParams() { return params; }
|
||||
|
||||
public abstract Similarity getSimilarity();
|
||||
public abstract SimilarityProvider getSimilarityProvider();
|
||||
}
|
||||
|
|
|
@ -55,13 +55,11 @@ public class SolrConstantScoreQuery extends ConstantScoreQuery {
|
|||
}
|
||||
|
||||
protected class ConstantWeight extends Weight {
|
||||
private Similarity similarity;
|
||||
private float queryNorm;
|
||||
private float queryWeight;
|
||||
private Map context;
|
||||
|
||||
public ConstantWeight(IndexSearcher searcher) throws IOException {
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.context = ValueSource.newContext(searcher);
|
||||
if (filter instanceof SolrFilter)
|
||||
((SolrFilter)filter).createWeight(context, searcher);
|
||||
|
|
|
@ -132,7 +132,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
}
|
||||
|
||||
this.closeReader = closeReader;
|
||||
setSimilarity(schema.getSimilarity());
|
||||
setSimilarityProvider(schema.getSimilarityProvider());
|
||||
|
||||
SolrConfig solrConfig = core.getSolrConfig();
|
||||
queryResultWindowSize = solrConfig.queryResultWindowSize;
|
||||
|
|
|
@ -41,7 +41,7 @@ public class IDFValueSource extends DocFreqValueSource {
|
|||
@Override
|
||||
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
|
||||
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
|
||||
Similarity sim = searcher.getSimilarity();
|
||||
Similarity sim = searcher.getSimilarityProvider().get(field);
|
||||
// todo: we need docFreq that takes a BytesRef
|
||||
String strVal = ByteUtils.UTF8toUTF16(indexedBytes);
|
||||
int docfreq = searcher.docFreq(new Term(indexedField, strVal));
|
||||
|
|
|
@ -46,7 +46,7 @@ public class NormValueSource extends ValueSource {
|
|||
@Override
|
||||
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
|
||||
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
|
||||
final Similarity similarity = searcher.getSimilarity();
|
||||
final Similarity similarity = searcher.getSimilarityProvider().get(field);
|
||||
final byte[] norms = readerContext.reader.norms(field);
|
||||
if (norms == null) {
|
||||
return new ConstDoubleDocValues(0.0, this);
|
||||
|
|
|
@ -25,7 +25,7 @@ public class TFValueSource extends TermFreqValueSource {
|
|||
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
|
||||
Fields fields = readerContext.reader.fields();
|
||||
final Terms terms = fields.terms(field);
|
||||
final Similarity similarity = ((IndexSearcher)context.get("searcher")).getSimilarity();
|
||||
final Similarity similarity = ((IndexSearcher)context.get("searcher")).getSimilarityProvider().get(field);
|
||||
|
||||
return new FloatDocValues(this) {
|
||||
DocsEnum docs ;
|
||||
|
|
|
@ -153,7 +153,7 @@ public class SolrIndexConfig {
|
|||
if (writeLockTimeout != -1)
|
||||
iwc.setWriteLockTimeout(writeLockTimeout);
|
||||
|
||||
iwc.setSimilarity(schema.getSimilarity());
|
||||
iwc.setSimilarityProvider(schema.getSimilarityProvider());
|
||||
iwc.setMergePolicy(buildMergePolicy(schema));
|
||||
iwc.setMergeScheduler(buildMergeScheduler(schema));
|
||||
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
*/
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
|
||||
public class CustomSimilarityFactory extends SimilarityFactory {
|
||||
public Similarity getSimilarity() {
|
||||
public SimilarityProvider getSimilarityProvider() {
|
||||
return new MockConfigurableSimilarity(params.get("echo"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.solr.common.params.MapSolrParams;
|
|||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -83,7 +83,7 @@ public class IndexSchemaTest extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void testSimilarityFactory() {
|
||||
SolrCore core = h.getCore();
|
||||
Similarity similarity = core.getSchema().getSimilarity();
|
||||
SimilarityProvider similarity = core.getSchema().getSimilarityProvider();
|
||||
assertTrue("wrong class", similarity instanceof MockConfigurableSimilarity);
|
||||
assertEquals("is there an echo?", ((MockConfigurableSimilarity)similarity).getPassthrough());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue