LUCENE-5911: Remove cacheing of norms, calculate up front in freeze()

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1633321 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Alan Woodward 2014-10-21 09:11:51 +00:00
parent fc3b1b07da
commit 2e1c562b4b
3 changed files with 84 additions and 38 deletions

View File

@ -71,7 +71,7 @@ New Features
Robert Muir)
* LUCENE-5911: Add MemoryIndex.freeze() to allow thread-safe searching over a
MemoryIndex. (Alan Woodward)
MemoryIndex. (Alan Woodward, David Smiley, Robert Muir)
* LUCENE-5969: Lucene 5.0 has a new index format with mismatched file detection,
improved exception handling, and indirect norms encoding for sparse fields.

View File

@ -208,6 +208,8 @@ public class MemoryIndex {
private Counter bytesUsed;
private boolean frozen = false;
private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
/**
* Sorts term entries into ascending order; also works for
@ -499,6 +501,15 @@ public class MemoryIndex {
}
}
/**
* Set the Similarity to be used for calculating field norms
*/
public void setSimilarity(Similarity similarity) {
if (frozen)
throw new IllegalArgumentException("Cannot set Similarity when MemoryIndex is frozen");
this.normSimilarity = similarity;
}
/**
* Creates and returns a searcher that can be used to execute arbitrary
* Lucene queries and to collect the resulting query results as hits.
@ -508,7 +519,7 @@ public class MemoryIndex {
public IndexSearcher createSearcher() {
MemoryIndexReader reader = new MemoryIndexReader();
IndexSearcher searcher = new IndexSearcher(reader); // ensures no auto-close !!
reader.setSearcher(searcher); // to later get hold of searcher.getSimilarity()
searcher.setSimilarity(normSimilarity);
return searcher;
}
@ -524,6 +535,7 @@ public class MemoryIndex {
for (Map.Entry<String,Info> info : sortedFields) {
info.getValue().sortTerms();
}
calculateNormValues();
}
/**
@ -744,8 +756,6 @@ public class MemoryIndex {
*/
private final class MemoryIndexReader extends LeafReader {
private IndexSearcher searcher; // needed to find searcher.getSimilarity()
private MemoryIndexReader() {
super(); // avoid as much superclass baggage as possible
}
@ -1169,15 +1179,6 @@ public class MemoryIndex {
return null;
}
}
private Similarity getSimilarity() {
if (searcher != null) return searcher.getSimilarity();
return IndexSearcher.getDefaultSimilarity();
}
private void setSearcher(IndexSearcher searcher) {
this.searcher = searcher;
}
@Override
public int numDocs() {
@ -1202,33 +1203,35 @@ public class MemoryIndex {
if (DEBUG) System.err.println("MemoryIndexReader.doClose");
}
/** performance hack: cache norms to avoid repeated expensive calculations */
private NumericDocValues cachedNormValues;
private String cachedFieldName;
private Similarity cachedSimilarity;
@Override
public NumericDocValues getNormValues(String field) {
FieldInfo fieldInfo = fieldInfos.get(field);
if (fieldInfo == null || fieldInfo.omitsNorms())
return null;
NumericDocValues norms = cachedNormValues;
Similarity sim = getSimilarity();
if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
Info info = getInfo(field);
int numTokens = info != null ? info.numTokens : 0;
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
float boost = info != null ? info.getBoost() : 1.0f;
FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
long value = sim.computeNorm(invertState);
norms = new MemoryIndexNormDocValues(value);
// cache it for future reuse
cachedNormValues = norms;
cachedFieldName = field;
cachedSimilarity = sim;
if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
}
return norms;
if (norms == null)
return calculateFieldNormValue(field);
return norms.get(field);
}
}
private Map<String, NumericDocValues> norms = null;
private NumericDocValues calculateFieldNormValue(String field) {
FieldInfo fieldInfo = fieldInfos.get(field);
if (fieldInfo == null)
return null;
Info info = fields.get(field);
int numTokens = info != null ? info.numTokens : 0;
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
float boost = info != null ? info.getBoost() : 1.0f;
FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
long value = normSimilarity.computeNorm(invertState);
if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
return new MemoryIndexNormDocValues(value);
}
private void calculateNormValues() {
norms = new HashMap<>();
for (String field : fieldInfos.keySet()) {
norms.put(field, calculateFieldNormValue(field));
}
}
@ -1239,6 +1242,8 @@ public class MemoryIndex {
this.fieldInfos.clear();
this.fields.clear();
this.sortedFields = null;
this.norms = null;
this.normSimilarity = IndexSearcher.getDefaultSimilarity();
byteBlockPool.reset(false, false); // no need to 0-fill the buffers
intBlockPool.reset(true, false); // here must must 0-fill since we use slices
this.frozen = false;

View File

@ -18,13 +18,20 @@ package org.apache.lucene.index.memory;
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.not;
import static org.junit.internal.matchers.StringContains.containsString;
@ -63,6 +70,14 @@ public class TestMemoryIndex extends LuceneTestCase {
assertThat(e.getMessage(), containsString("frozen"));
}
try {
mi.setSimilarity(new BM25Similarity(1, 1));
fail("Expected an IllegalArgumentException when setting the Similarity after calling freeze()");
}
catch (RuntimeException e) {
assertThat(e.getMessage(), containsString("frozen"));
}
assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));
mi.reset();
@ -70,6 +85,32 @@ public class TestMemoryIndex extends LuceneTestCase {
assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));
// check we can set the Similarity again
mi.setSimilarity(new DefaultSimilarity());
}
@Test
public void testSimilarities() throws IOException {
MemoryIndex mi = new MemoryIndex();
mi.addField("f1", "a long text field that contains many many terms", analyzer);
IndexSearcher searcher = mi.createSearcher();
LeafReader reader = (LeafReader) searcher.getIndexReader();
float n1 = reader.getNormValues("f1").get(0);
// Norms aren't cached, so we can change the Similarity
mi.setSimilarity(new DefaultSimilarity() {
@Override
public float lengthNorm(FieldInvertState state) {
return 74;
}
});
float n2 = reader.getNormValues("f1").get(0);
assertTrue(n1 != n2);
}