mirror of https://github.com/apache/lucene.git
LUCENE-5911: Remove cacheing of norms, calculate up front in freeze()
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1633321 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fc3b1b07da
commit
2e1c562b4b
|
@ -71,7 +71,7 @@ New Features
|
|||
Robert Muir)
|
||||
|
||||
* LUCENE-5911: Add MemoryIndex.freeze() to allow thread-safe searching over a
|
||||
MemoryIndex. (Alan Woodward)
|
||||
MemoryIndex. (Alan Woodward, David Smiley, Robert Muir)
|
||||
|
||||
* LUCENE-5969: Lucene 5.0 has a new index format with mismatched file detection,
|
||||
improved exception handling, and indirect norms encoding for sparse fields.
|
||||
|
|
|
@ -208,6 +208,8 @@ public class MemoryIndex {
|
|||
private Counter bytesUsed;
|
||||
|
||||
private boolean frozen = false;
|
||||
|
||||
private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
|
||||
|
||||
/**
|
||||
* Sorts term entries into ascending order; also works for
|
||||
|
@ -499,6 +501,15 @@ public class MemoryIndex {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the Similarity to be used for calculating field norms
|
||||
*/
|
||||
public void setSimilarity(Similarity similarity) {
|
||||
if (frozen)
|
||||
throw new IllegalArgumentException("Cannot set Similarity when MemoryIndex is frozen");
|
||||
this.normSimilarity = similarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and returns a searcher that can be used to execute arbitrary
|
||||
* Lucene queries and to collect the resulting query results as hits.
|
||||
|
@ -508,7 +519,7 @@ public class MemoryIndex {
|
|||
public IndexSearcher createSearcher() {
|
||||
MemoryIndexReader reader = new MemoryIndexReader();
|
||||
IndexSearcher searcher = new IndexSearcher(reader); // ensures no auto-close !!
|
||||
reader.setSearcher(searcher); // to later get hold of searcher.getSimilarity()
|
||||
searcher.setSimilarity(normSimilarity);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
|
@ -524,6 +535,7 @@ public class MemoryIndex {
|
|||
for (Map.Entry<String,Info> info : sortedFields) {
|
||||
info.getValue().sortTerms();
|
||||
}
|
||||
calculateNormValues();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -744,8 +756,6 @@ public class MemoryIndex {
|
|||
*/
|
||||
private final class MemoryIndexReader extends LeafReader {
|
||||
|
||||
private IndexSearcher searcher; // needed to find searcher.getSimilarity()
|
||||
|
||||
private MemoryIndexReader() {
|
||||
super(); // avoid as much superclass baggage as possible
|
||||
}
|
||||
|
@ -1169,15 +1179,6 @@ public class MemoryIndex {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private Similarity getSimilarity() {
|
||||
if (searcher != null) return searcher.getSimilarity();
|
||||
return IndexSearcher.getDefaultSimilarity();
|
||||
}
|
||||
|
||||
private void setSearcher(IndexSearcher searcher) {
|
||||
this.searcher = searcher;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDocs() {
|
||||
|
@ -1202,33 +1203,35 @@ public class MemoryIndex {
|
|||
if (DEBUG) System.err.println("MemoryIndexReader.doClose");
|
||||
}
|
||||
|
||||
/** performance hack: cache norms to avoid repeated expensive calculations */
|
||||
private NumericDocValues cachedNormValues;
|
||||
private String cachedFieldName;
|
||||
private Similarity cachedSimilarity;
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNormValues(String field) {
|
||||
FieldInfo fieldInfo = fieldInfos.get(field);
|
||||
if (fieldInfo == null || fieldInfo.omitsNorms())
|
||||
return null;
|
||||
NumericDocValues norms = cachedNormValues;
|
||||
Similarity sim = getSimilarity();
|
||||
if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
|
||||
Info info = getInfo(field);
|
||||
int numTokens = info != null ? info.numTokens : 0;
|
||||
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
|
||||
float boost = info != null ? info.getBoost() : 1.0f;
|
||||
FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
|
||||
long value = sim.computeNorm(invertState);
|
||||
norms = new MemoryIndexNormDocValues(value);
|
||||
// cache it for future reuse
|
||||
cachedNormValues = norms;
|
||||
cachedFieldName = field;
|
||||
cachedSimilarity = sim;
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
|
||||
}
|
||||
return norms;
|
||||
if (norms == null)
|
||||
return calculateFieldNormValue(field);
|
||||
return norms.get(field);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Map<String, NumericDocValues> norms = null;
|
||||
|
||||
private NumericDocValues calculateFieldNormValue(String field) {
|
||||
FieldInfo fieldInfo = fieldInfos.get(field);
|
||||
if (fieldInfo == null)
|
||||
return null;
|
||||
Info info = fields.get(field);
|
||||
int numTokens = info != null ? info.numTokens : 0;
|
||||
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
|
||||
float boost = info != null ? info.getBoost() : 1.0f;
|
||||
FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
|
||||
long value = normSimilarity.computeNorm(invertState);
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
|
||||
return new MemoryIndexNormDocValues(value);
|
||||
}
|
||||
|
||||
private void calculateNormValues() {
|
||||
norms = new HashMap<>();
|
||||
for (String field : fieldInfos.keySet()) {
|
||||
norms.put(field, calculateFieldNormValue(field));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1239,6 +1242,8 @@ public class MemoryIndex {
|
|||
this.fieldInfos.clear();
|
||||
this.fields.clear();
|
||||
this.sortedFields = null;
|
||||
this.norms = null;
|
||||
this.normSimilarity = IndexSearcher.getDefaultSimilarity();
|
||||
byteBlockPool.reset(false, false); // no need to 0-fill the buffers
|
||||
intBlockPool.reset(true, false); // here must must 0-fill since we use slices
|
||||
this.frozen = false;
|
||||
|
|
|
@ -18,13 +18,20 @@ package org.apache.lucene.index.memory;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.hamcrest.CoreMatchers.not;
|
||||
import static org.junit.internal.matchers.StringContains.containsString;
|
||||
|
@ -63,6 +70,14 @@ public class TestMemoryIndex extends LuceneTestCase {
|
|||
assertThat(e.getMessage(), containsString("frozen"));
|
||||
}
|
||||
|
||||
try {
|
||||
mi.setSimilarity(new BM25Similarity(1, 1));
|
||||
fail("Expected an IllegalArgumentException when setting the Similarity after calling freeze()");
|
||||
}
|
||||
catch (RuntimeException e) {
|
||||
assertThat(e.getMessage(), containsString("frozen"));
|
||||
}
|
||||
|
||||
assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));
|
||||
|
||||
mi.reset();
|
||||
|
@ -70,6 +85,32 @@ public class TestMemoryIndex extends LuceneTestCase {
|
|||
assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
|
||||
assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));
|
||||
|
||||
// check we can set the Similarity again
|
||||
mi.setSimilarity(new DefaultSimilarity());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimilarities() throws IOException {
|
||||
|
||||
MemoryIndex mi = new MemoryIndex();
|
||||
mi.addField("f1", "a long text field that contains many many terms", analyzer);
|
||||
|
||||
IndexSearcher searcher = mi.createSearcher();
|
||||
LeafReader reader = (LeafReader) searcher.getIndexReader();
|
||||
float n1 = reader.getNormValues("f1").get(0);
|
||||
|
||||
// Norms aren't cached, so we can change the Similarity
|
||||
mi.setSimilarity(new DefaultSimilarity() {
|
||||
@Override
|
||||
public float lengthNorm(FieldInvertState state) {
|
||||
return 74;
|
||||
}
|
||||
});
|
||||
float n2 = reader.getNormValues("f1").get(0);
|
||||
|
||||
assertTrue(n1 != n2);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue