LUCENE-2236: per-field similarity

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1062927 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-01-24 19:13:31 +00:00
parent 25f5488fd2
commit 3f255f6cea
64 changed files with 524 additions and 290 deletions

View File

@ -131,6 +131,9 @@ Changes in backwards compatibility policy
* LUCENE-2882: Cut over SpanQuery#getSpans to AtomicReaderContext to enforce
per segment semantics on SpanQuery & Spans. (Simon Willnauer)
* LUCENE-2236: Similarity can now be configured on a per-field basis. See the
migration notes in MIGRATE.txt for more details. (Robert Muir, Doron Cohen)
Changes in Runtime Behavior
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you

View File

@ -331,3 +331,9 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
toString() is no longer implemented by AttributeImpl, so if you have overridden
toString(), port your customization over to reflectWith(). reflectAsString() would
then return what toString() did before.
* LUCENE-2236: DefaultSimilarity can no longer be set statically (and dangerously) for the entire JVM.
Instead, IndexWriterConfig and IndexSearcher now take a SimilarityProvider.
Similarity can now be configured on a per-field basis.
Similarity retains only the field-specific relevance methods such as tf() and idf().
Methods that apply to the entire query such as coord() and queryNorm() exist in SimilarityProvider.

View File

@ -42,7 +42,8 @@ import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.CollectionUtil;
@ -67,7 +68,7 @@ public class InstantiatedIndexWriter implements Closeable {
private final InstantiatedIndex index;
private final Analyzer analyzer;
private Similarity similarity = Similarity.getDefault(); // how to normalize;
private SimilarityProvider similarityProvider = IndexSearcher.getDefaultSimilarityProvider(); // how to normalize;
private transient Set<String> fieldNameBuffer;
/**
@ -236,11 +237,12 @@ public class InstantiatedIndexWriter implements Closeable {
termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size();
if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
final String fieldName = eFieldTermDocInfoFactoriesByTermText.getKey().fieldName;
final FieldInvertState invertState = new FieldInvertState();
invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost());
invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
final float norm = similarity.computeNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, invertState);
normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = similarity.encodeNormValue(norm);
final float norm = similarityProvider.get(fieldName).computeNorm(fieldName, invertState);
normsByFieldNameAndDocumentNumber.get(fieldName)[document.getDocumentNumber()] = similarityProvider.get(fieldName).encodeNormValue(norm);
} else {
System.currentTimeMillis();
}
@ -659,12 +661,12 @@ public class InstantiatedIndexWriter implements Closeable {
addDocument(doc, analyzer);
}
public Similarity getSimilarity() {
return similarity;
public SimilarityProvider getSimilarityProvider() {
return similarityProvider;
}
public void setSimilarity(Similarity similarity) {
this.similarity = similarity;
public void setSimilarityProvider(SimilarityProvider similarityProvider) {
this.similarityProvider = similarityProvider;
}
public Analyzer getAnalyzer() {

View File

@ -57,6 +57,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.RAMDirectory; // for javadocs
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -1169,9 +1170,9 @@ public class MemoryIndex implements Serializable {
};
}
private Similarity getSimilarity() {
if (searcher != null) return searcher.getSimilarity();
return Similarity.getDefault();
private SimilarityProvider getSimilarityProvider() {
if (searcher != null) return searcher.getSimilarityProvider();
return IndexSearcher.getDefaultSimilarityProvider();
}
private void setSearcher(IndexSearcher searcher) {
@ -1181,20 +1182,21 @@ public class MemoryIndex implements Serializable {
/** performance hack: cache norms to avoid repeated expensive calculations */
private byte[] cachedNorms;
private String cachedFieldName;
private Similarity cachedSimilarity;
private SimilarityProvider cachedSimilarity;
@Override
public byte[] norms(String fieldName) {
byte[] norms = cachedNorms;
Similarity sim = getSimilarity();
SimilarityProvider sim = getSimilarityProvider();
if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached?
Info info = getInfo(fieldName);
Similarity fieldSim = sim.get(fieldName);
int numTokens = info != null ? info.numTokens : 0;
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
float boost = info != null ? info.getBoost() : 1.0f;
FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
float n = sim.computeNorm(fieldName, invertState);
byte norm = sim.encodeNormValue(n);
float n = fieldSim.computeNorm(fieldName, invertState);
byte norm = fieldSim.encodeNormValue(n);
norms = new byte[] {norm};
// cache it for future reuse

View File

@ -24,6 +24,7 @@ import java.util.ArrayList;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.StringHelper;
@ -57,13 +58,13 @@ public class FieldNormModifier {
System.exit(1);
}
Similarity s = null;
SimilarityProvider s = null;
if (args[1].equals("-d"))
args[1] = DefaultSimilarity.class.getName();
try {
s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance();
s = Class.forName(args[1]).asSubclass(SimilarityProvider.class).newInstance();
} catch (Exception e) {
System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]);
e.printStackTrace(System.err);
@ -84,7 +85,7 @@ public class FieldNormModifier {
private Directory dir;
private Similarity sim;
private SimilarityProvider sim;
/**
* Constructor for code that wishes to use this class programmatically
@ -93,7 +94,7 @@ public class FieldNormModifier {
* @param d the Directory to modify
* @param s the Similarity to use (can be null)
*/
public FieldNormModifier(Directory d, Similarity s) {
public FieldNormModifier(Directory d, SimilarityProvider s) {
dir = d;
sim = s;
}
@ -111,7 +112,7 @@ public class FieldNormModifier {
*/
public void reSetNorms(String field) throws IOException {
String fieldName = StringHelper.intern(field);
Similarity fieldSim = sim.get(field);
IndexReader reader = null;
try {
reader = IndexReader.open(dir, false);
@ -148,7 +149,7 @@ public class FieldNormModifier {
for (int d = 0; d < termCounts.length; d++) {
if (delDocs == null || !delDocs.get(d)) {
invertState.setLength(termCounts[d]);
subReader.setNorm(d, fieldName, sim.encodeNormValue(sim.computeNorm(fieldName, invertState)));
subReader.setNorm(d, fieldName, fieldSim.encodeNormValue(fieldSim.computeNorm(fieldName, invertState)));
}
}
}

View File

@ -28,7 +28,7 @@ import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -42,7 +42,7 @@ public class TestFieldNormModifier extends LuceneTestCase {
public Directory store;
/** inverts the normal notion of lengthNorm */
public static Similarity s = new DefaultSimilarity() {
public static SimilarityProvider s = new DefaultSimilarity() {
@Override
public float computeNorm(String fieldName, FieldInvertState state) {
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());

View File

@ -33,7 +33,7 @@ import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -47,7 +47,7 @@ public class TestLengthNormModifier extends LuceneTestCase {
public Directory store;
/** inverts the normal notion of lengthNorm */
public static Similarity s = new DefaultSimilarity() {
public static SimilarityProvider s = new DefaultSimilarity() {
@Override
public float computeNorm(String fieldName, FieldInvertState state) {
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
@ -163,7 +163,7 @@ public class TestLengthNormModifier extends LuceneTestCase {
}
// override the norms to be inverted
Similarity s = new DefaultSimilarity() {
SimilarityProvider s = new DefaultSimilarity() {
@Override
public float computeNorm(String fieldName, FieldInvertState state) {
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());

View File

@ -31,7 +31,7 @@ import org.apache.lucene.queryParser.standard.parser.EscapeQuerySyntaxImpl;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.BooleanQuery.TooManyClauses;
/**
@ -41,7 +41,7 @@ import org.apache.lucene.search.BooleanQuery.TooManyClauses;
*
* @see BooleanQueryNodeBuilder
* @see BooleanQuery
* @see Similarity#coord(int, int)
* @see SimilarityProvider#coord(int, int)
*/
public class StandardBooleanQueryNodeBuilder implements StandardQueryBuilder {

View File

@ -22,14 +22,14 @@ import java.util.List;
import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
/**
* A {@link StandardBooleanQueryNode} has the same behavior as
* {@link BooleanQueryNode}. It only indicates if the coord should be enabled or
* not for this boolean query. <br/>
*
* @see Similarity#coord(int, int)
* @see SimilarityProvider#coord(int, int)
* @see BooleanQuery
*/
public class StandardBooleanQueryNode extends BooleanQueryNode {

View File

@ -30,7 +30,7 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMFile;
@ -127,7 +127,7 @@ final class DocumentsWriter {
private boolean aborting; // True if an abort is pending
PrintStream infoStream;
Similarity similarity;
SimilarityProvider similarityProvider;
// max # simultaneous threads; if there are more than
// this, they wait for others to finish first
@ -140,7 +140,7 @@ final class DocumentsWriter {
DocumentsWriter docWriter;
Analyzer analyzer;
PrintStream infoStream;
Similarity similarity;
SimilarityProvider similarityProvider;
int docID;
Document doc;
String maxTermPrefix;
@ -284,7 +284,7 @@ final class DocumentsWriter {
DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletes bufferedDeletes) throws IOException {
this.directory = directory;
this.writer = writer;
this.similarity = writer.getConfig().getSimilarity();
this.similarityProvider = writer.getConfig().getSimilarityProvider();
this.maxThreadStates = maxThreadStates;
this.fieldInfos = fieldInfos;
this.bufferedDeletes = bufferedDeletes;
@ -357,10 +357,10 @@ final class DocumentsWriter {
}
}
synchronized void setSimilarity(Similarity similarity) {
this.similarity = similarity;
synchronized void setSimilarityProvider(SimilarityProvider similarity) {
this.similarityProvider = similarity;
for(int i=0;i<threadStates.length;i++) {
threadStates[i].docState.similarity = similarity;
threadStates[i].docState.similarityProvider = similarity;
}
}

View File

@ -36,7 +36,7 @@ final class DocumentsWriterThreadState {
this.docWriter = docWriter;
docState = new DocumentsWriter.DocState();
docState.infoStream = docWriter.infoStream;
docState.similarity = docWriter.similarity;
docState.similarityProvider = docWriter.similarityProvider;
docState.docWriter = docWriter;
consumer = docWriter.consumer.addThread(this);
}

View File

@ -21,7 +21,8 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.util.Version;
/**
@ -111,7 +112,7 @@ public final class IndexWriterConfig implements Cloneable {
private IndexDeletionPolicy delPolicy;
private IndexCommit commit;
private OpenMode openMode;
private Similarity similarity;
private SimilarityProvider similarityProvider;
private int termIndexInterval; // TODO: this should be private to the codec, not settable here
private MergeScheduler mergeScheduler;
private long writeLockTimeout;
@ -142,7 +143,7 @@ public final class IndexWriterConfig implements Cloneable {
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
commit = null;
openMode = OpenMode.CREATE_OR_APPEND;
similarity = Similarity.getDefault();
similarityProvider = IndexSearcher.getDefaultSimilarityProvider();
termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
mergeScheduler = new ConcurrentMergeScheduler();
writeLockTimeout = WRITE_LOCK_TIMEOUT;
@ -234,25 +235,22 @@ public final class IndexWriterConfig implements Cloneable {
}
/**
* Expert: set the {@link Similarity} implementation used by this IndexWriter.
* Expert: set the {@link SimilarityProvider} implementation used by this IndexWriter.
* <p>
* <b>NOTE:</b> the similarity cannot be null. If <code>null</code> is passed,
* the similarity will be set to the default.
*
* @see Similarity#setDefault(Similarity)
* <b>NOTE:</b> the similarity provider cannot be null. If <code>null</code> is passed,
* the similarity provider will be set to the default implementation (unspecified).
*/
public IndexWriterConfig setSimilarity(Similarity similarity) {
this.similarity = similarity == null ? Similarity.getDefault() : similarity;
public IndexWriterConfig setSimilarityProvider(SimilarityProvider similarityProvider) {
this.similarityProvider = similarityProvider == null ? IndexSearcher.getDefaultSimilarityProvider() : similarityProvider;
return this;
}
/**
* Expert: returns the {@link Similarity} implementation used by this
* IndexWriter. This defaults to the current value of
* {@link Similarity#getDefault()}.
* Expert: returns the {@link SimilarityProvider} implementation used by this
* IndexWriter.
*/
public Similarity getSimilarity() {
return similarity;
public SimilarityProvider getSimilarityProvider() {
return similarityProvider;
}
/**
@ -576,7 +574,7 @@ public final class IndexWriterConfig implements Cloneable {
sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
sb.append("openMode=").append(openMode).append("\n");
sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
sb.append("similarityProvider=").append(similarityProvider.getClass().getName()).append("\n");
sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");

View File

@ -17,6 +17,7 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.search.Similarity;
import org.apache.lucene.util.ArrayUtil;
/** Taps into DocInverter, as an InvertedDocEndConsumer,
@ -29,7 +30,8 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
final NormsWriterPerThread perThread;
final FieldInfo fieldInfo;
final DocumentsWriter.DocState docState;
final Similarity similarity;
// Holds all docID/norm pairs we've seen
int[] docIDs = new int[1];
byte[] norms = new byte[1];
@ -49,6 +51,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
this.fieldInfo = fieldInfo;
docState = perThread.docState;
fieldState = docInverterPerField.fieldState;
similarity = docState.similarityProvider.get(fieldInfo.name);
}
@Override
@ -71,8 +74,8 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
assert norms.length == upto;
norms = ArrayUtil.grow(norms, 1+upto);
}
final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
norms[upto] = docState.similarity.encodeNormValue(norm);
final float norm = similarity.computeNorm(fieldInfo.name, fieldState);
norms[upto] = similarity.encodeNormValue(norm);
docIDs[upto] = docState.docID;
upto++;
}

View File

@ -72,18 +72,18 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
/** Constructs an empty boolean query.
*
* {@link Similarity#coord(int,int)} may be disabled in scoring, as
* {@link SimilarityProvider#coord(int,int)} may be disabled in scoring, as
* appropriate. For example, this score factor does not make sense for most
* automatically generated queries, like {@link WildcardQuery} and {@link
* FuzzyQuery}.
*
* @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
* @param disableCoord disables {@link SimilarityProvider#coord(int,int)} in scoring.
*/
public BooleanQuery(boolean disableCoord) {
this.disableCoord = disableCoord;
}
/** Returns true iff {@link Similarity#coord(int,int)} is disabled in
/** Returns true iff {@link SimilarityProvider#coord(int,int)} is disabled in
* scoring for this query instance.
* @see #BooleanQuery(boolean)
*/
@ -162,14 +162,14 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
*/
protected class BooleanWeight extends Weight {
/** The Similarity implementation. */
protected Similarity similarity;
protected SimilarityProvider similarityProvider;
protected ArrayList<Weight> weights;
protected int maxCoord; // num optional + num required
private final boolean disableCoord;
public BooleanWeight(IndexSearcher searcher, boolean disableCoord)
throws IOException {
this.similarity = searcher.getSimilarity();
this.similarityProvider = searcher.getSimilarityProvider();
this.disableCoord = disableCoord;
weights = new ArrayList<Weight>(clauses.size());
for (int i = 0 ; i < clauses.size(); i++) {
@ -202,7 +202,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
}
public float coord(int overlap, int maxOverlap) {
return similarity.coord(overlap, maxOverlap);
return similarityProvider.coord(overlap, maxOverlap);
}
@Override

View File

@ -20,7 +20,7 @@ import org.apache.lucene.index.FieldInvertState;
*/
/** Expert: Default scoring implementation. */
public class DefaultSimilarity extends Similarity {
public class DefaultSimilarity extends Similarity implements SimilarityProvider {
/** Implemented as
* <code>state.getBoost()*lengthNorm(numTerms)</code>, where
@ -41,7 +41,6 @@ public class DefaultSimilarity extends Similarity {
}
/** Implemented as <code>1/sqrt(sumOfSquaredWeights)</code>. */
@Override
public float queryNorm(float sumOfSquaredWeights) {
return (float)(1.0 / Math.sqrt(sumOfSquaredWeights));
}
@ -65,7 +64,6 @@ public class DefaultSimilarity extends Similarity {
}
/** Implemented as <code>overlap / maxOverlap</code>. */
@Override
public float coord(int overlap, int maxOverlap) {
return overlap / (float)maxOverlap;
}
@ -90,4 +88,12 @@ public class DefaultSimilarity extends Similarity {
public boolean getDiscountOverlaps() {
return discountOverlaps;
}
/**
* Returns this default implementation for all fields.
* Override this method to customize scoring on a per-field basis.
*/
public Similarity get(String field) {
return this;
}
}

View File

@ -70,8 +70,22 @@ public class IndexSearcher {
private final ExecutorService executor;
protected final IndexSearcher[] subSearchers;
/** The Similarity implementation used by this searcher. */
private Similarity similarity = Similarity.getDefault();
// the default SimilarityProvider
private static final SimilarityProvider defaultProvider = new DefaultSimilarity();
/**
* Expert: returns a default SimilarityProvider instance.
* In general, this method is only called to initialize searchers and writers.
* User code and query implementations should respect
* {@link IndexSearcher#getSimilarityProvider()}.
* @lucene.internal
*/
public static SimilarityProvider getDefaultSimilarityProvider() {
return defaultProvider;
}
/** The SimilarityProvider implementation used by this searcher. */
private SimilarityProvider similarityProvider = defaultProvider;
/** Creates a searcher searching the index in the named
* directory, with readOnly=true
@ -248,16 +262,15 @@ public class IndexSearcher {
return reader.document(docID, fieldSelector);
}
/** Expert: Set the Similarity implementation used by this Searcher.
/** Expert: Set the SimilarityProvider implementation used by this Searcher.
*
* @see Similarity#setDefault(Similarity)
*/
public void setSimilarity(Similarity similarity) {
this.similarity = similarity;
public void setSimilarityProvider(SimilarityProvider similarityProvider) {
this.similarityProvider = similarityProvider;
}
public Similarity getSimilarity() {
return similarity;
public SimilarityProvider getSimilarityProvider() {
return similarityProvider;
}
/**

View File

@ -98,7 +98,7 @@ public class MatchAllDocsQuery extends Query {
private float queryNorm;
public MatchAllDocsWeight(IndexSearcher searcher) {
this.similarity = searcher.getSimilarity();
this.similarity = normsField == null ? null : searcher.getSimilarityProvider().get(normsField);
}
@Override

View File

@ -139,7 +139,7 @@ public class MultiPhraseQuery extends Query {
public MultiPhraseWeight(IndexSearcher searcher)
throws IOException {
this.similarity = searcher.getSimilarity();
this.similarity = searcher.getSimilarityProvider().get(field);
// compute idf
ArrayList<Term> allTerms = new ArrayList<Term>();

View File

@ -146,7 +146,7 @@ public class PhraseQuery extends Query {
public PhraseWeight(IndexSearcher searcher)
throws IOException {
this.similarity = searcher.getSimilarity();
this.similarity = searcher.getSimilarityProvider().get(field);
idfExp = similarity.idfExplain(terms, searcher);
idf = idfExp.getIdf();

View File

@ -98,7 +98,7 @@ public abstract class Query implements java.io.Serializable, Cloneable {
Query query = searcher.rewrite(this);
Weight weight = query.createWeight(searcher);
float sum = weight.sumOfSquaredWeights();
float norm = searcher.getSimilarity().queryNorm(sum);
float norm = searcher.getSimilarityProvider().queryNorm(sum);
if (Float.isInfinite(norm) || Float.isNaN(norm))
norm = 1.0f;
weight.normalize(norm);

View File

@ -362,7 +362,7 @@ import org.apache.lucene.util.SmallFloat;
* Typically, a document that contains more of the query's terms will receive a higher score
* than another document with fewer query terms.
* This is a search time factor computed in
* {@link #coord(int, int) coord(q,d)}
* {@link SimilarityProvider#coord(int, int) coord(q,d)}
* by the Similarity in effect at search time.
* <br>&nbsp;<br>
* </li>
@ -522,40 +522,13 @@ import org.apache.lucene.util.SmallFloat;
* </li>
* </ol>
*
* @see #setDefault(Similarity)
* @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity)
* @see IndexSearcher#setSimilarity(Similarity)
* @see org.apache.lucene.index.IndexWriterConfig#setSimilarityProvider(SimilarityProvider)
* @see IndexSearcher#setSimilarityProvider(SimilarityProvider)
*/
public abstract class Similarity implements Serializable {
/**
* The Similarity implementation used by default.
**/
private static Similarity defaultImpl = new DefaultSimilarity();
public static final int NO_DOC_ID_PROVIDED = -1;
/** Set the default Similarity implementation used by indexing and search
* code.
*
* @see IndexSearcher#setSimilarity(Similarity)
* @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity)
*/
public static void setDefault(Similarity similarity) {
Similarity.defaultImpl = similarity;
}
/** Return the default Similarity implementation used by indexing and search
* code.
*
* <p>This is initially an instance of {@link DefaultSimilarity}.
*
* @see IndexSearcher#setSimilarity(Similarity)
* @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity)
*/
public static Similarity getDefault() {
return Similarity.defaultImpl;
}
/** Cache of decoded bytes. */
private static final float[] NORM_TABLE = new float[256];
@ -632,21 +605,6 @@ public abstract class Similarity implements Serializable {
throw new UnsupportedOperationException("please use computeNorm instead");
}
/** Computes the normalization value for a query given the sum of the squared
* weights of each of the query terms. This value is multiplied into the
* weight of each query term. While the classic query normalization factor is
* computed as 1/sqrt(sumOfSquaredWeights), other implementations might
* completely ignore sumOfSquaredWeights (ie return 1).
*
* <p>This does not affect ranking, but the default implementation does make scores
* from different queries more comparable than they would be by eliminating the
* magnitude of the Query vector as a factor in the score.
*
* @param sumOfSquaredWeights the sum of the squares of query term weights
* @return a normalization factor for query weights
*/
public abstract float queryNorm(float sumOfSquaredWeights);
/** Encodes a normalization factor for storage in an index.
*
* <p>The encoding uses a three-bit mantissa, a five-bit exponent, and
@ -816,20 +774,6 @@ public abstract class Similarity implements Serializable {
*/
public abstract float idf(int docFreq, int numDocs);
/** Computes a score factor based on the fraction of all query terms that a
* document contains. This value is multiplied into scores.
*
* <p>The presence of a large portion of the query terms indicates a better
* match with the query, so implementations of this method usually return
* larger values when the ratio between these parameters is large and smaller
* values when the ratio between them is small.
*
* @param overlap the number of query terms matched in the document
* @param maxOverlap the total number of terms in the query
* @return a score factor based on term overlap with the query
*/
public abstract float coord(int overlap, int maxOverlap);
/**
* Calculate a scoring factor based on the data in the payload. Overriding implementations
* are responsible for interpreting what is in the payload. Lucene makes no assumptions about

View File

@ -0,0 +1,66 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Expert: Scoring API.
*
* Provides top-level scoring functions that aren't specific to a field,
* and work across multi-field queries (such as {@link BooleanQuery}).
*
* Field-specific scoring is accomplished through {@link Similarity}.
*
* @lucene.experimental
*/
public interface SimilarityProvider {
/** Computes a score factor based on the fraction of all query terms that a
* document contains. This value is multiplied into scores.
*
* <p>The presence of a large portion of the query terms indicates a better
* match with the query, so implementations of this method usually return
* larger values when the ratio between these parameters is large and smaller
* values when the ratio between them is small.
*
* @param overlap the number of query terms matched in the document
* @param maxOverlap the total number of terms in the query
* @return a score factor based on term overlap with the query
*/
public abstract float coord(int overlap, int maxOverlap);
/** Computes the normalization value for a query given the sum of the squared
* weights of each of the query terms. This value is multiplied into the
* weight of each query term. While the classic query normalization factor is
* computed as 1/sqrt(sumOfSquaredWeights), other implementations might
* completely ignore sumOfSquaredWeights (ie return 1).
*
* <p>This does not affect ranking, but the default implementation does make scores
* from different queries more comparable than they would be by eliminating the
* magnitude of the Query vector as a factor in the score.
*
* @param sumOfSquaredWeights the sum of the squares of query term weights
* @return a normalization factor for query weights
*/
public abstract float queryNorm(float sumOfSquaredWeights);
/** Returns a {@link Similarity} for scoring a field
* @param field field name.
* @return a field-specific Similarity.
*/
public abstract Similarity get(String field);
}

View File

@ -54,7 +54,7 @@ public class TermQuery extends Query {
throws IOException {
assert termStates != null : "PerReaderTermState must not be null";
this.termStates = termStates;
this.similarity = searcher.getSimilarity();
this.similarity = searcher.getSimilarityProvider().get(term.field());
if (docFreq != -1) {
idfExp = similarity.idfExplain(term, searcher, docFreq);
} else {

View File

@ -44,7 +44,7 @@ import org.apache.lucene.index.IndexReader.ReaderContext;
* <code>IndexSearcher</code> ({@link Query#createWeight(IndexSearcher)}).
* <li>The {@link #sumOfSquaredWeights()} method is called on the
* <code>Weight</code> to compute the query normalization factor
* {@link Similarity#queryNorm(float)} of the query clauses contained in the
* {@link SimilarityProvider#queryNorm(float)} of the query clauses contained in the
* query.
* <li>The query normalization factor is passed to {@link #normalize(float)}. At
* this point the weighting is complete.

View File

@ -42,7 +42,7 @@ public class SpanWeight extends Weight {
public SpanWeight(SpanQuery query, IndexSearcher searcher)
throws IOException {
this.similarity = searcher.getSimilarity();
this.similarity = searcher.getSimilarityProvider().get(query.getField());
this.query = query;
terms=new HashSet<Term>();

View File

@ -28,7 +28,7 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
@ -220,7 +220,7 @@ class DocHelper {
*/
public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException
{
return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), Similarity.getDefault(), doc);
return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), null, doc);
}
/**
@ -233,9 +233,9 @@ class DocHelper {
* @param doc
* @throws IOException
*/
public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException {
public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity));
//writer.setUseCompoundFile(false);
writer.addDocument(doc);
writer.commit();

View File

@ -38,12 +38,13 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
@ -412,7 +413,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
Term searchTerm = new Term("id", "6");
int delCount = reader.deleteDocuments(searchTerm);
assertEquals("wrong delete count", 1, delCount);
reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", Similarity.getDefault().encodeNormValue(2.0f));
reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", searcher.getSimilarityProvider().get("content").encodeNormValue(2.0f));
reader.close();
searcher.close();
@ -460,7 +461,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
Term searchTerm = new Term("id", "6");
int delCount = reader.deleteDocuments(searchTerm);
assertEquals("wrong delete count", 1, delCount);
reader.setNorm(22, "content", Similarity.getDefault().encodeNormValue(2.0f));
reader.setNorm(22, "content", searcher.getSimilarityProvider().get("content").encodeNormValue(2.0f));
reader.close();
// make sure they "took":
@ -519,7 +520,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
assertEquals("didn't delete the right number of documents", 1, delCount);
// Set one norm so we get a .s0 file:
reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
reader.setNorm(21, "content", conf.getSimilarityProvider().get("content").encodeNormValue(1.5f));
reader.close();
}
@ -556,7 +557,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
assertEquals("didn't delete the right number of documents", 1, delCount);
// Set one norm so we get a .s0 file:
reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
SimilarityProvider sim = new DefaultSimilarity();
reader.setNorm(21, "content", sim.get("content").encodeNormValue(1.5f));
reader.close();
// The numbering of fields can vary depending on which

View File

@ -30,7 +30,6 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -608,7 +607,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
writer.close();
IndexReader reader = IndexReader.open(dir, policy, false);
reader.deleteDocument(3*i+1);
reader.setNorm(4*i+1, "content", Similarity.getDefault().encodeNormValue(2.0F));
reader.setNorm(4*i+1, "content", conf.getSimilarityProvider().get("content").encodeNormValue(2.0F));
IndexSearcher searcher = new IndexSearcher(reader);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(16*(1+i), hits.length);
@ -716,7 +715,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
writer.close();
IndexReader reader = IndexReader.open(dir, policy, false);
reader.deleteDocument(3);
reader.setNorm(5, "content", Similarity.getDefault().encodeNormValue(2.0F));
reader.setNorm(5, "content", conf.getSimilarityProvider().get("content").encodeNormValue(2.0F));
IndexSearcher searcher = new IndexSearcher(reader);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(16, hits.length);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
@ -67,9 +68,9 @@ public class TestIndexFileDeleter extends LuceneTestCase {
Term searchTerm = new Term("id", "7");
int delCount = reader.deleteDocuments(searchTerm);
assertEquals("didn't delete the right number of documents", 1, delCount);
Similarity sim = new DefaultSimilarity().get("content");
// Set one norm so we get a .s0 file:
reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
reader.setNorm(21, "content", sim.encodeNormValue(1.5f));
reader.close();
// Now, artificially create an extra .del file & extra

View File

@ -39,11 +39,12 @@ import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
@ -464,8 +465,9 @@ public class TestIndexReader extends LuceneTestCase
// expected
}
Similarity sim = new DefaultSimilarity().get("aaa");
try {
reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f));
reader.setNorm(5, "aaa", sim.encodeNormValue(2.0f));
fail("setNorm after close failed to throw IOException");
} catch (AlreadyClosedException e) {
// expected
@ -504,8 +506,9 @@ public class TestIndexReader extends LuceneTestCase
// expected
}
Similarity sim = new DefaultSimilarity().get("aaa");
try {
reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f));
reader.setNorm(5, "aaa", sim.encodeNormValue(2.0f));
fail("setNorm should have hit LockObtainFailedException");
} catch (LockObtainFailedException e) {
// expected
@ -535,7 +538,8 @@ public class TestIndexReader extends LuceneTestCase
// now open reader & set norm for doc 0
IndexReader reader = IndexReader.open(dir, false);
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
Similarity sim = new DefaultSimilarity().get("content");
reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
// we should be holding the write lock now:
assertTrue("locked", IndexWriter.isLocked(dir));
@ -549,7 +553,7 @@ public class TestIndexReader extends LuceneTestCase
IndexReader reader2 = IndexReader.open(dir, false);
// set norm again for doc 0
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(3.0f));
reader.setNorm(0, "content", sim.encodeNormValue(3.0f));
assertTrue("locked", IndexWriter.isLocked(dir));
reader.close();
@ -579,15 +583,16 @@ public class TestIndexReader extends LuceneTestCase
addDoc(writer, searchTerm.text());
writer.close();
Similarity sim = new DefaultSimilarity().get("content");
// now open reader & set norm for doc 0 (writes to
// _0_1.s0)
reader = IndexReader.open(dir, false);
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
reader.close();
// now open reader again & set norm for doc 0 (writes to _0_2.s0)
reader = IndexReader.open(dir, false);
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
reader.close();
assertFalse("failed to remove first generation norms file on writing second generation",
dir.fileExists("_0_1.s0"));
@ -966,13 +971,13 @@ public class TestIndexReader extends LuceneTestCase
dir.setMaxSizeInBytes(thisDiskFree);
dir.setRandomIOExceptionRate(rate);
Similarity sim = new DefaultSimilarity().get("content");
try {
if (0 == x) {
int docId = 12;
for(int i=0;i<13;i++) {
reader.deleteDocument(docId);
reader.setNorm(docId, "content", Similarity.getDefault().encodeNormValue(2.0f));
reader.setNorm(docId, "content", sim.encodeNormValue(2.0f));
docId += 12;
}
}
@ -1130,8 +1135,9 @@ public class TestIndexReader extends LuceneTestCase
}
reader = IndexReader.open(dir, false);
Similarity sim = new DefaultSimilarity().get("content");
try {
reader.setNorm(1, "content", Similarity.getDefault().encodeNormValue(2.0f));
reader.setNorm(1, "content", sim.encodeNormValue(2.0f));
fail("did not hit exception when calling setNorm on an invalid doc number");
} catch (ArrayIndexOutOfBoundsException e) {
// expected

View File

@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.index.SegmentReader.Norm;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@ -272,13 +273,14 @@ public class TestIndexReaderClone extends LuceneTestCase {
* @throws Exception
*/
private void performDefaultTests(IndexReader r1) throws Exception {
float norm1 = Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]);
Similarity sim = new DefaultSimilarity().get("field1");
float norm1 = sim.decodeNormValue(MultiNorms.norms(r1, "field1")[4]);
IndexReader pr1Clone = (IndexReader) r1.clone();
pr1Clone.deleteDocument(10);
pr1Clone.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f));
assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1);
assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1);
pr1Clone.setNorm(4, "field1", sim.encodeNormValue(0.5f));
assertTrue(sim.decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1);
assertTrue(sim.decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1);
final Bits delDocs = MultiFields.getDeletedDocs(r1);
assertTrue(delDocs == null || !delDocs.get(10));
@ -327,7 +329,8 @@ public class TestIndexReaderClone extends LuceneTestCase {
TestIndexReaderReopen.createIndex(random, dir1, false);
SegmentReader origSegmentReader = getOnlySegmentReader(IndexReader.open(dir1, false));
origSegmentReader.deleteDocument(1);
origSegmentReader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f));
Similarity sim = new DefaultSimilarity().get("field1");
origSegmentReader.setNorm(4, "field1", sim.encodeNormValue(0.5f));
SegmentReader clonedSegmentReader = (SegmentReader) origSegmentReader
.clone();
@ -426,8 +429,9 @@ public class TestIndexReaderClone extends LuceneTestCase {
final Directory dir1 = newDirectory();
TestIndexReaderReopen.createIndex(random, dir1, false);
IndexReader orig = IndexReader.open(dir1, false);
orig.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(17.0f));
final byte encoded = Similarity.getDefault().encodeNormValue(17.0f);
Similarity sim = new DefaultSimilarity().get("field1");
orig.setNorm(1, "field1", sim.encodeNormValue(17.0f));
final byte encoded = sim.encodeNormValue(17.0f);
assertEquals(encoded, MultiNorms.norms(orig, "field1")[1]);
// the cloned segmentreader should have 2 references, 1 to itself, and 1 to

View File

@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.SegmentReader.Norm;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -50,7 +51,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
private static final int NUM_FIELDS = 10;
private Similarity similarityOne;
private SimilarityProvider similarityOne;
private Analyzer anlzr;
@ -203,19 +204,20 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
IndexReader reader4C = (IndexReader) reader3C.clone();
SegmentReader segmentReader4C = getOnlySegmentReader(reader4C);
assertEquals(4, reader3CCNorm.bytesRef().get());
reader4C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.33f));
Similarity sim = new DefaultSimilarity().get("field1");
reader4C.setNorm(5, "field1", sim.encodeNormValue(0.33f));
// generate a cannot update exception in reader1
try {
reader3C.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(0.99f));
reader3C.setNorm(1, "field1", sim.encodeNormValue(0.99f));
fail("did not hit expected exception");
} catch (Exception ex) {
// expected
}
// norm values should be different
assertTrue(Similarity.getDefault().decodeNormValue(segmentReader3C.norms("field1")[5])
!= Similarity.getDefault().decodeNormValue(segmentReader4C.norms("field1")[5]));
assertTrue(sim.decodeNormValue(segmentReader3C.norms("field1")[5])
!= sim.decodeNormValue(segmentReader4C.norms("field1")[5]));
Norm reader4CCNorm = segmentReader4C.norms.get("field1");
assertEquals(3, reader3CCNorm.bytesRef().get());
assertEquals(1, reader4CCNorm.bytesRef().get());
@ -223,7 +225,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
IndexReader reader5C = (IndexReader) reader4C.clone();
SegmentReader segmentReader5C = getOnlySegmentReader(reader5C);
Norm reader5CCNorm = segmentReader5C.norms.get("field1");
reader5C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.7f));
reader5C.setNorm(5, "field1", sim.encodeNormValue(0.7f));
assertEquals(1, reader5CCNorm.bytesRef().get());
reader5C.close();
@ -237,7 +239,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
private void createIndex(Random random, Directory dir) throws IOException {
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
.setMaxBufferedDocs(5).setSimilarity(similarityOne));
.setMaxBufferedDocs(5).setSimilarityProvider(similarityOne));
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
lmp.setMergeFactor(3);
lmp.setUseCompoundFile(true);
@ -256,8 +258,9 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
// System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
modifiedNorms.set(i, Float.valueOf(newNorm));
modifiedNorms.set(k, Float.valueOf(origNorm));
ir.setNorm(i, "f" + 1, Similarity.getDefault().encodeNormValue(newNorm));
ir.setNorm(k, "f" + 1, Similarity.getDefault().encodeNormValue(origNorm));
Similarity sim = new DefaultSimilarity().get("f" + 1);
ir.setNorm(i, "f" + 1, sim.encodeNormValue(newNorm));
ir.setNorm(k, "f" + 1, sim.encodeNormValue(origNorm));
// System.out.println("setNorm i: "+i);
// break;
}
@ -277,7 +280,8 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
assertEquals("number of norms mismatches", numDocNorms, b.length);
ArrayList<Float> storedNorms = (i == 1 ? modifiedNorms : norms);
for (int j = 0; j < b.length; j++) {
float norm = Similarity.getDefault().decodeNormValue(b[j]);
Similarity sim = new DefaultSimilarity().get(field);
float norm = sim.decodeNormValue(b[j]);
float norm1 = storedNorms.get(j).floatValue();
assertEquals("stored norm value of " + field + " for doc " + j + " is "
+ norm + " - a mismatch!", norm, norm1, 0.000001);
@ -289,7 +293,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
throws IOException {
IndexWriterConfig conf = newIndexWriterConfig(
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
.setMaxBufferedDocs(5).setSimilarity(similarityOne);
.setMaxBufferedDocs(5).setSimilarityProvider(similarityOne);
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
lmp.setMergeFactor(3);
lmp.setUseCompoundFile(compound);
@ -303,7 +307,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
// create the next document
private Document newDoc() {
Document d = new Document();
float boost = nextNorm();
float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed
for (int i = 0; i < 10; i++) {
Field f = newField("f" + i, "v" + i, Store.NO, Index.NOT_ANALYZED);
f.setBoost(boost);
@ -313,11 +317,12 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
}
// return unique norm values that are unchanged by encoding/decoding
private float nextNorm() {
private float nextNorm(String fname) {
float norm = lastNorm + normDelta;
Similarity sim = new DefaultSimilarity().get(fname);
do {
float norm1 = Similarity.getDefault().decodeNormValue(
Similarity.getDefault().encodeNormValue(norm));
float norm1 = sim.decodeNormValue(
sim.encodeNormValue(norm));
if (norm1 > lastNorm) {
// System.out.println(norm1+" > "+lastNorm);
norm = norm1;

View File

@ -35,9 +35,11 @@ import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
@ -615,8 +617,9 @@ public class TestIndexReaderReopen extends LuceneTestCase {
IndexReader reader2 = reader1.reopen();
modifier = IndexReader.open(dir1, false);
modifier.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(50f));
modifier.setNorm(1, "field2", Similarity.getDefault().encodeNormValue(50f));
SimilarityProvider sim = new DefaultSimilarity();
modifier.setNorm(1, "field1", sim.get("field1").encodeNormValue(50f));
modifier.setNorm(1, "field2", sim.get("field2").encodeNormValue(50f));
modifier.close();
IndexReader reader3 = reader2.reopen();
@ -709,7 +712,8 @@ public class TestIndexReaderReopen extends LuceneTestCase {
protected void modifyIndex(int i) throws IOException {
if (i % 3 == 0) {
IndexReader modifier = IndexReader.open(dir, false);
modifier.setNorm(i, "field1", Similarity.getDefault().encodeNormValue(50f));
Similarity sim = new DefaultSimilarity().get("field1");
modifier.setNorm(i, "field1", sim.encodeNormValue(50f));
modifier.close();
} else if (i % 3 == 1) {
IndexReader modifier = IndexReader.open(dir, false);
@ -989,9 +993,10 @@ public class TestIndexReaderReopen extends LuceneTestCase {
}
case 1: {
IndexReader reader = IndexReader.open(dir, false);
reader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(123f));
reader.setNorm(44, "field2", Similarity.getDefault().encodeNormValue(222f));
reader.setNorm(44, "field4", Similarity.getDefault().encodeNormValue(22f));
SimilarityProvider sim = new DefaultSimilarity();
reader.setNorm(4, "field1", sim.get("field1").encodeNormValue(123f));
reader.setNorm(44, "field2", sim.get("field2").encodeNormValue(222f));
reader.setNorm(44, "field4", sim.get("field4").encodeNormValue(22f));
reader.close();
break;
}
@ -1012,8 +1017,9 @@ public class TestIndexReaderReopen extends LuceneTestCase {
}
case 4: {
IndexReader reader = IndexReader.open(dir, false);
reader.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(123f));
reader.setNorm(55, "field2", Similarity.getDefault().encodeNormValue(222f));
SimilarityProvider sim = new DefaultSimilarity();
reader.setNorm(5, "field1", sim.get("field1").encodeNormValue(123f));
reader.setNorm(55, "field2", sim.get("field2").encodeNormValue(222f));
reader.close();
break;
}

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
@ -55,7 +55,8 @@ public class TestIndexWriterConfig extends LuceneTestCase {
assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
assertTrue(Similarity.getDefault() == conf.getSimilarity());
// we don't need to assert this, it should be unspecified
assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider());
assertEquals(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.getTermIndexInterval());
assertEquals(IndexWriterConfig.getDefaultWriteLockTimeout(), conf.getWriteLockTimeout());
assertEquals(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.getDefaultWriteLockTimeout());
@ -77,7 +78,7 @@ public class TestIndexWriterConfig extends LuceneTestCase {
getters.add("getMaxFieldLength");
getters.add("getMergeScheduler");
getters.add("getOpenMode");
getters.add("getSimilarity");
getters.add("getSimilarityProvider");
getters.add("getTermIndexInterval");
getters.add("getWriteLockTimeout");
getters.add("getDefaultWriteLockTimeout");
@ -173,12 +174,13 @@ public class TestIndexWriterConfig extends LuceneTestCase {
conf.setMergeScheduler(null);
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
// Test Similarity
assertTrue(Similarity.getDefault() == conf.getSimilarity());
conf.setSimilarity(new MySimilarity());
assertEquals(MySimilarity.class, conf.getSimilarity().getClass());
conf.setSimilarity(null);
assertTrue(Similarity.getDefault() == conf.getSimilarity());
// Test Similarity:
// we shouldnt assert what the default is, just that its not null.
assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider());
conf.setSimilarityProvider(new MySimilarity());
assertEquals(MySimilarity.class, conf.getSimilarityProvider().getClass());
conf.setSimilarityProvider(null);
assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider());
// Test IndexingChain
assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain());

View File

@ -46,7 +46,7 @@ public class TestMaxTermFrequency extends LuceneTestCase {
dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(MockTokenizer.SIMPLE, true));
config.setSimilarity(new TestSimilarity());
config.setSimilarityProvider(new TestSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
Document doc = new Document();
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);

View File

@ -30,6 +30,7 @@ import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -49,7 +50,7 @@ public class TestNorms extends LuceneTestCase {
private static final int NUM_FIELDS = 10;
private Similarity similarityOne;
private SimilarityProvider similarityOne;
private Analyzer anlzr;
private int numDocNorms;
private ArrayList<Float> norms;
@ -151,7 +152,7 @@ public class TestNorms extends LuceneTestCase {
private void createIndex(Random random, Directory dir) throws IOException {
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
.setMaxBufferedDocs(5).setSimilarity(similarityOne));
.setMaxBufferedDocs(5).setSimilarityProvider(similarityOne));
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
lmp.setMergeFactor(3);
lmp.setUseCompoundFile(true);
@ -169,8 +170,9 @@ public class TestNorms extends LuceneTestCase {
//System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
modifiedNorms.set(i, Float.valueOf(newNorm));
modifiedNorms.set(k, Float.valueOf(origNorm));
ir.setNorm(i, "f"+1, Similarity.getDefault().encodeNormValue(newNorm));
ir.setNorm(k, "f"+1, Similarity.getDefault().encodeNormValue(origNorm));
Similarity sim = new DefaultSimilarity().get("f"+1);
ir.setNorm(i, "f"+1, sim.encodeNormValue(newNorm));
ir.setNorm(k, "f"+1, sim.encodeNormValue(origNorm));
}
ir.close();
}
@ -184,7 +186,7 @@ public class TestNorms extends LuceneTestCase {
assertEquals("number of norms mismatches",numDocNorms,b.length);
ArrayList<Float> storedNorms = (i==1 ? modifiedNorms : norms);
for (int j = 0; j < b.length; j++) {
float norm = similarityOne.decodeNormValue(b[j]);
float norm = similarityOne.get(field).decodeNormValue(b[j]);
float norm1 = storedNorms.get(j).floatValue();
assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
}
@ -195,7 +197,7 @@ public class TestNorms extends LuceneTestCase {
private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException {
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
.setMaxBufferedDocs(5).setSimilarity(similarityOne));
.setMaxBufferedDocs(5).setSimilarityProvider(similarityOne));
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
lmp.setMergeFactor(3);
lmp.setUseCompoundFile(compound);
@ -208,7 +210,7 @@ public class TestNorms extends LuceneTestCase {
// create the next document
private Document newDoc() {
Document d = new Document();
float boost = nextNorm();
float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed
for (int i = 0; i < 10; i++) {
Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED);
f.setBoost(boost);
@ -218,10 +220,11 @@ public class TestNorms extends LuceneTestCase {
}
// return unique norm values that are unchanged by encoding/decoding
private float nextNorm() {
private float nextNorm(String fname) {
float norm = lastNorm + normDelta;
Similarity similarity = similarityOne.get(fname);
do {
float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm));
float norm1 = similarity.decodeNormValue(similarity.encodeNormValue(norm));
if (norm1 > lastNorm) {
//System.out.println(norm1+" > "+lastNorm);
norm = norm1;
@ -258,7 +261,7 @@ public class TestNorms extends LuceneTestCase {
public void testCustomEncoder() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
config.setSimilarity(new CustomNormEncodingSimilarity());
config.setSimilarityProvider(new CustomNormEncodingSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
Document doc = new Document();
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);

View File

@ -35,13 +35,11 @@ import org.apache.lucene.search.Explanation.IDFExplanation;
public class TestOmitTf extends LuceneTestCase {
public static class SimpleSimilarity extends Similarity {
public static class SimpleSimilarity extends Similarity implements SimilarityProvider {
@Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
@Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
@Override public float tf(float freq) { return freq; }
@Override public float sloppyFreq(int distance) { return 2.0f; }
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
@Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
@Override public IDFExplanation idfExplain(Collection<Term> terms, IndexSearcher searcher) throws IOException {
return new IDFExplanation() {
@Override
@ -54,6 +52,11 @@ public class TestOmitTf extends LuceneTestCase {
}
};
}
public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
public float coord(int overlap, int maxOverlap) { return 1.0f; }
public Similarity get(String field) {
return this;
}
}
// Tests whether the DocumentWriter correctly enable the
@ -251,7 +254,7 @@ public class TestOmitTf extends LuceneTestCase {
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
setMaxBufferedDocs(2).
setSimilarity(new SimpleSimilarity()).
setSimilarityProvider(new SimpleSimilarity()).
setMergePolicy(newLogMergePolicy(2))
);
@ -281,7 +284,7 @@ public class TestOmitTf extends LuceneTestCase {
* Verify the index
*/
IndexSearcher searcher = new IndexSearcher(dir, true);
searcher.setSimilarity(new SimpleSimilarity());
searcher.setSimilarityProvider(new SimpleSimilarity());
Term a = new Term("noTf", term);
Term b = new Term("tf", term);

View File

@ -147,7 +147,8 @@ public class TestParallelReader extends LuceneTestCase {
assertTrue(pr.isCurrent());
IndexReader modifier = IndexReader.open(dir1, false);
modifier.setNorm(0, "f1", Similarity.getDefault().encodeNormValue(100f));
SimilarityProvider sim = new DefaultSimilarity();
modifier.setNorm(0, "f1", sim.get("f1").encodeNormValue(100f));
modifier.close();
// one of the two IndexReaders which ParallelReader is using
@ -155,7 +156,7 @@ public class TestParallelReader extends LuceneTestCase {
assertFalse(pr.isCurrent());
modifier = IndexReader.open(dir2, false);
modifier.setNorm(0, "f3", Similarity.getDefault().encodeNormValue(100f));
modifier.setNorm(0, "f3", sim.get("f3").encodeNormValue(100f));
modifier.close();
// now both are not current anymore

View File

@ -242,11 +242,6 @@ final class JustCompileSearch {
static final class JustCompileSimilarity extends Similarity {
@Override
public float coord(int overlap, int maxOverlap) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public float idf(int docFreq, int numDocs) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
@ -257,11 +252,6 @@ final class JustCompileSearch {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public float queryNorm(float sumOfSquaredWeights) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public float sloppyFreq(int distance) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
@ -270,8 +260,22 @@ final class JustCompileSearch {
@Override
public float tf(float freq) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}
static final class JustCompileSimilarityProvider implements SimilarityProvider {
public float queryNorm(float sumOfSquaredWeights) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
public float coord(int overlap, int maxOverlap) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
public Similarity get(String field) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}
static final class JustCompileSpanFilter extends SpanFilter {

View File

@ -158,7 +158,7 @@ public class QueryUtils {
0 < edge ? r : IndexReader.open(makeEmptyIndex(random, 0), true))
};
IndexSearcher out = new IndexSearcher(new MultiReader(readers));
out.setSimilarity(s.getSimilarity());
out.setSimilarityProvider(s.getSimilarityProvider());
return out;
}

View File

@ -208,9 +208,9 @@ public class TestBoolean2 extends LuceneTestCase {
public void testQueries10() throws Exception {
String queryText = "+w3 +xx +w2 zz";
int[] expDocNrs = {2, 3};
Similarity oldSimilarity = searcher.getSimilarity();
SimilarityProvider oldSimilarity = searcher.getSimilarityProvider();
try {
searcher.setSimilarity(new DefaultSimilarity(){
searcher.setSimilarityProvider(new DefaultSimilarity(){
@Override
public float coord(int overlap, int maxOverlap) {
return overlap / ((float)maxOverlap - 1);
@ -218,7 +218,7 @@ public class TestBoolean2 extends LuceneTestCase {
});
queriesTest(queryText, expDocNrs);
} finally {
searcher.setSimilarity(oldSimilarity);
searcher.setSimilarityProvider(oldSimilarity);
}
}

View File

@ -34,7 +34,7 @@ public class TestComplexExplanations extends TestExplanations {
@Override
public void setUp() throws Exception {
super.setUp();
searcher.setSimilarity(createQnorm1Similarity());
searcher.setSimilarityProvider(createQnorm1Similarity());
}
// must be static for weight serialization tests

View File

@ -97,7 +97,7 @@ public class TestConstantScoreQuery extends LuceneTestCase {
searcher = new IndexSearcher(reader);
// set a similarity that does not normalize our boost away
searcher.setSimilarity(new DefaultSimilarity() {
searcher.setSimilarityProvider(new DefaultSimilarity() {
@Override
public float queryNorm(float sumOfSquaredWeights) {
return 1.0f;

View File

@ -73,7 +73,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
}
}
public Similarity sim = new TestSimilarity();
public SimilarityProvider sim = new TestSimilarity();
public Directory index;
public IndexReader r;
public IndexSearcher s;
@ -85,7 +85,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
index = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, index,
newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())
.setSimilarity(sim));
.setSimilarityProvider(sim));
// hed is the most important field, dek is secondary
@ -150,7 +150,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
r = new SlowMultiReaderWrapper(writer.getReader());
writer.close();
s = new IndexSearcher(r);
s.setSimilarity(sim);
s.setSimilarityProvider(sim);
}
@Override

View File

@ -69,7 +69,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase {
assertEquals("one", ir.document(hits[2].doc).get("key"));
// change norm & retest
ir.setNorm(0, "key", Similarity.getDefault().encodeNormValue(400f));
ir.setNorm(0, "key", is.getSimilarityProvider().get("key").encodeNormValue(400f));
normsQuery = new MatchAllDocsQuery("key");
hits = is.search(normsQuery, null, 1000).scoreDocs;
assertEquals(3, hits.length);

View File

@ -295,7 +295,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
IndexReader reader = writer.getReader();
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(new DefaultSimilarity() {
searcher.setSimilarityProvider(new DefaultSimilarity() {
@Override
public IDFExplanation idfExplain(Collection<Term> terms,

View File

@ -51,10 +51,11 @@ public class TestSetNorm extends LuceneTestCase {
// reset the boost of each instance of this document
IndexReader reader = IndexReader.open(store, false);
reader.setNorm(0, "field", Similarity.getDefault().encodeNormValue(1.0f));
reader.setNorm(1, "field", Similarity.getDefault().encodeNormValue(2.0f));
reader.setNorm(2, "field", Similarity.getDefault().encodeNormValue(4.0f));
reader.setNorm(3, "field", Similarity.getDefault().encodeNormValue(16.0f));
Similarity similarity = new DefaultSimilarity().get("field");
reader.setNorm(0, "field", similarity.encodeNormValue(1.0f));
reader.setNorm(1, "field", similarity.encodeNormValue(2.0f));
reader.setNorm(2, "field", similarity.encodeNormValue(4.0f));
reader.setNorm(3, "field", similarity.encodeNormValue(16.0f));
reader.close();
// check that searches are ordered by this boost

View File

@ -39,13 +39,11 @@ import org.apache.lucene.search.Explanation.IDFExplanation;
*/
public class TestSimilarity extends LuceneTestCase {
public static class SimpleSimilarity extends Similarity {
public static class SimpleSimilarity extends Similarity implements SimilarityProvider {
@Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
@Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
@Override public float tf(float freq) { return freq; }
@Override public float sloppyFreq(int distance) { return 2.0f; }
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
@Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
@Override public IDFExplanation idfExplain(Collection<Term> terms, IndexSearcher searcher) throws IOException {
return new IDFExplanation() {
@Override
@ -58,13 +56,18 @@ public class TestSimilarity extends LuceneTestCase {
}
};
}
public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
public float coord(int overlap, int maxOverlap) { return 1.0f; }
public Similarity get(String field) {
return this;
}
}
public void testSimilarity() throws Exception {
Directory store = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, store,
newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())
.setSimilarity(new SimpleSimilarity()));
.setSimilarityProvider(new SimpleSimilarity()));
Document d1 = new Document();
d1.add(newField("field", "a c", Field.Store.YES, Field.Index.ANALYZED));
@ -78,7 +81,7 @@ public class TestSimilarity extends LuceneTestCase {
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(new SimpleSimilarity());
searcher.setSimilarityProvider(new SimpleSimilarity());
Term a = new Term("field", "a");
Term b = new Term("field", "b");

View File

@ -0,0 +1,151 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiNorms;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestSimilarityProvider extends LuceneTestCase {
private Directory directory;
private IndexReader reader;
private IndexSearcher searcher;
@Override
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
SimilarityProvider sim = new ExampleSimilarityProvider();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer()).setSimilarityProvider(sim);
RandomIndexWriter iw = new RandomIndexWriter(random, directory, iwc);
Document doc = new Document();
Field field = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
doc.add(field);
Field field2 = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED);
doc.add(field2);
field.setValue("quick brown fox");
field2.setValue("quick brown fox");
iw.addDocument(doc);
field.setValue("jumps over lazy brown dog");
field2.setValue("jumps over lazy brown dog");
iw.addDocument(doc);
reader = iw.getReader();
iw.close();
searcher = new IndexSearcher(reader);
searcher.setSimilarityProvider(sim);
}
@Override
public void tearDown() throws Exception {
searcher.close();
reader.close();
directory.close();
super.tearDown();
}
public void testBasics() throws Exception {
// sanity check of norms writer
byte fooNorms[] = MultiNorms.norms(reader, "foo");
byte barNorms[] = MultiNorms.norms(reader, "bar");
for (int i = 0; i < fooNorms.length; i++) {
assertFalse(fooNorms[i] == barNorms[i]);
}
// sanity check of searching
TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10);
assertTrue(foodocs.totalHits > 0);
TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10);
assertTrue(bardocs.totalHits > 0);
assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score);
}
private class ExampleSimilarityProvider implements SimilarityProvider {
private Similarity sim1 = new Sim1();
private Similarity sim2 = new Sim2();
public float coord(int overlap, int maxOverlap) {
return 1f;
}
public float queryNorm(float sumOfSquaredWeights) {
return 1f;
}
public Similarity get(String field) {
if (field.equals("foo")) {
return sim1;
} else {
return sim2;
}
}
}
private class Sim1 extends Similarity {
@Override
public float computeNorm(String field, FieldInvertState state) {
return 1f;
}
@Override
public float sloppyFreq(int distance) {
return 1f;
}
@Override
public float tf(float freq) {
return 1f;
}
@Override
public float idf(int docFreq, int numDocs) {
return 1f;
}
}
private class Sim2 extends Similarity {
@Override
public float computeNorm(String field, FieldInvertState state) {
return 10f;
}
@Override
public float sloppyFreq(int distance) {
return 10f;
}
@Override
public float tf(float freq) {
return 10f;
}
@Override
public float idf(int docFreq, int numDocs) {
return 10f;
}
}
}

View File

@ -27,7 +27,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.util.English;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.RAMDirectory;
@ -111,13 +111,13 @@ public class PayloadHelper {
* @throws IOException
*/
// TODO: randomize
public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
public IndexSearcher setUp(Random random, SimilarityProvider similarity, int numDocs) throws IOException {
Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
PayloadAnalyzer analyzer = new PayloadAnalyzer();
// TODO randomize this
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity));
// writer.infoStream = System.out;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
@ -130,7 +130,7 @@ public class PayloadHelper {
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(similarity);
searcher.setSimilarityProvider(similarity);
return searcher;
}

View File

@ -105,7 +105,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
.setSimilarity(similarity));
.setSimilarityProvider(similarity));
//writer.infoStream = System.out;
for (int i = 0; i < 1000; i++) {
Document doc = new Document();
@ -118,7 +118,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
writer.close();
searcher = new IndexSearcher(reader);
searcher.setSimilarity(similarity);
searcher.setSimilarityProvider(similarity);
}
@Override

View File

@ -110,7 +110,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
.setSimilarity(similarity));
.setSimilarityProvider(similarity));
//writer.infoStream = System.out;
for (int i = 0; i < 1000; i++) {
Document doc = new Document();
@ -125,7 +125,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
writer.close();
searcher = new IndexSearcher(reader);
searcher.setSimilarity(similarity);
searcher.setSimilarityProvider(similarity);
}
@Override
@ -220,7 +220,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
new MaxPayloadFunction(), false);
IndexSearcher theSearcher = new IndexSearcher(directory, true);
theSearcher.setSimilarity(new FullSimilarity());
theSearcher.setSimilarityProvider(new FullSimilarity());
TopDocs hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);

View File

@ -39,7 +39,7 @@ import org.apache.lucene.index.Payload;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.payloads.PayloadHelper;
@ -50,7 +50,7 @@ import org.apache.lucene.util.LuceneTestCase;
public class TestPayloadSpans extends LuceneTestCase {
private IndexSearcher searcher;
private Similarity similarity = new DefaultSimilarity();
private SimilarityProvider similarity = new DefaultSimilarity();
protected IndexReader indexReader;
private IndexReader closeIndexReader;
private Directory directory;
@ -110,7 +110,7 @@ public class TestPayloadSpans extends LuceneTestCase {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity));
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity));
Document doc = new Document();
doc.add(newField(PayloadHelper.FIELD, "one two three one four three",
@ -370,7 +370,7 @@ public class TestPayloadSpans extends LuceneTestCase {
public void testPayloadSpanUtil() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity));
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity));
Document doc = new Document();
doc.add(newField(PayloadHelper.FIELD,"xx rr yy mm pp", Field.Store.YES, Field.Index.ANALYZED));
@ -430,7 +430,7 @@ public class TestPayloadSpans extends LuceneTestCase {
directory = newDirectory();
String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"};
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity));
newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity));
Document doc = null;
for(int i = 0; i < docs.length; i++) {

View File

@ -20,9 +20,9 @@ package org.apache.lucene.search.spans;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight.ScorerContext;
@ -410,17 +410,17 @@ public class TestSpans extends LuceneTestCase {
for (int i = 0; i < leaves.length; i++) {
final Similarity sim = new DefaultSimilarity() {
final SimilarityProvider sim = new DefaultSimilarity() {
@Override
public float sloppyFreq(int distance) {
return 0.0f;
}
};
final Similarity oldSim = searcher.getSimilarity();
final SimilarityProvider oldSim = searcher.getSimilarityProvider();
Scorer spanScorer;
try {
searcher.setSimilarity(sim);
searcher.setSimilarityProvider(sim);
SpanNearQuery snq = new SpanNearQuery(
new SpanQuery[] {
makeSpanTermQuery("t1"),
@ -430,7 +430,7 @@ public class TestSpans extends LuceneTestCase {
spanScorer = snq.weight(searcher).scorer(leaves[i], ScorerContext.def());
} finally {
searcher.setSimilarity(oldSim);
searcher.setSimilarityProvider(oldSim);
}
if (i == subIndex) {
assertTrue("first doc", spanScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

View File

@ -20,7 +20,8 @@ package org.apache.solr.schema;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.util.Version;
import org.apache.solr.common.ResourceLoader;
@ -192,7 +193,7 @@ public final class IndexSchema {
/**
* Returns the Similarity used for this index
*/
public Similarity getSimilarity() { return similarityFactory.getSimilarity(); }
public SimilarityProvider getSimilarityProvider() { return similarityFactory.getSimilarityProvider(); }
/**
* Returns the SimilarityFactory used for this index
@ -496,8 +497,8 @@ public final class IndexSchema {
Node node = (Node) xpath.evaluate("/schema/similarity", document, XPathConstants.NODE);
if (node==null) {
similarityFactory = new SimilarityFactory() {
public Similarity getSimilarity() {
return Similarity.getDefault();
public SimilarityProvider getSimilarityProvider() {
return IndexSearcher.getDefaultSimilarityProvider();
}
};
log.debug("using default similarity");
@ -509,10 +510,10 @@ public final class IndexSchema {
similarityFactory = (SimilarityFactory)obj;
similarityFactory.init(params);
} else {
// just like always, assume it's a Similarlity and get a ClassCastException - reasonable error handling
// just like always, assume it's a SimilarityProvider and get a ClassCastException - reasonable error handling
similarityFactory = new SimilarityFactory() {
public Similarity getSimilarity() {
return (Similarity) obj;
public SimilarityProvider getSimilarityProvider() {
return (SimilarityProvider) obj;
}
};
}

View File

@ -16,7 +16,7 @@ package org.apache.solr.schema;
* limitations under the License.
*/
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.solr.common.params.SolrParams;
public abstract class SimilarityFactory {
@ -25,5 +25,5 @@ public abstract class SimilarityFactory {
public void init(SolrParams params) { this.params = params; }
public SolrParams getParams() { return params; }
public abstract Similarity getSimilarity();
public abstract SimilarityProvider getSimilarityProvider();
}

View File

@ -55,13 +55,11 @@ public class SolrConstantScoreQuery extends ConstantScoreQuery {
}
protected class ConstantWeight extends Weight {
private Similarity similarity;
private float queryNorm;
private float queryWeight;
private Map context;
public ConstantWeight(IndexSearcher searcher) throws IOException {
this.similarity = searcher.getSimilarity();
this.context = ValueSource.newContext(searcher);
if (filter instanceof SolrFilter)
((SolrFilter)filter).createWeight(context, searcher);

View File

@ -132,7 +132,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
}
this.closeReader = closeReader;
setSimilarity(schema.getSimilarity());
setSimilarityProvider(schema.getSimilarityProvider());
SolrConfig solrConfig = core.getSolrConfig();
queryResultWindowSize = solrConfig.queryResultWindowSize;

View File

@ -41,7 +41,7 @@ public class IDFValueSource extends DocFreqValueSource {
@Override
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
Similarity sim = searcher.getSimilarity();
Similarity sim = searcher.getSimilarityProvider().get(field);
// todo: we need docFreq that takes a BytesRef
String strVal = ByteUtils.UTF8toUTF16(indexedBytes);
int docfreq = searcher.docFreq(new Term(indexedField, strVal));

View File

@ -46,7 +46,7 @@ public class NormValueSource extends ValueSource {
@Override
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
final Similarity similarity = searcher.getSimilarity();
final Similarity similarity = searcher.getSimilarityProvider().get(field);
final byte[] norms = readerContext.reader.norms(field);
if (norms == null) {
return new ConstDoubleDocValues(0.0, this);

View File

@ -25,7 +25,7 @@ public class TFValueSource extends TermFreqValueSource {
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
Fields fields = readerContext.reader.fields();
final Terms terms = fields.terms(field);
final Similarity similarity = ((IndexSearcher)context.get("searcher")).getSimilarity();
final Similarity similarity = ((IndexSearcher)context.get("searcher")).getSimilarityProvider().get(field);
return new FloatDocValues(this) {
DocsEnum docs ;

View File

@ -153,7 +153,7 @@ public class SolrIndexConfig {
if (writeLockTimeout != -1)
iwc.setWriteLockTimeout(writeLockTimeout);
iwc.setSimilarity(schema.getSimilarity());
iwc.setSimilarityProvider(schema.getSimilarityProvider());
iwc.setMergePolicy(buildMergePolicy(schema));
iwc.setMergeScheduler(buildMergeScheduler(schema));

View File

@ -16,10 +16,10 @@
*/
package org.apache.solr.schema;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
public class CustomSimilarityFactory extends SimilarityFactory {
public Similarity getSimilarity() {
public SimilarityProvider getSimilarityProvider() {
return new MockConfigurableSimilarity(params.get("echo"));
}
}

View File

@ -27,7 +27,7 @@ import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.junit.BeforeClass;
import org.junit.Test;
@ -83,7 +83,7 @@ public class IndexSchemaTest extends SolrTestCaseJ4 {
@Test
public void testSimilarityFactory() {
SolrCore core = h.getCore();
Similarity similarity = core.getSchema().getSimilarity();
SimilarityProvider similarity = core.getSchema().getSimilarityProvider();
assertTrue("wrong class", similarity instanceof MockConfigurableSimilarity);
assertEquals("is there an echo?", ((MockConfigurableSimilarity)similarity).getPassthrough());
}