LUCENE-1260: use the provided sim to encode norms

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1056821 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-01-08 22:31:00 +00:00
parent 4f7eba849a
commit eacd3d2636
4 changed files with 60 additions and 3 deletions

View File

@ -37,7 +37,7 @@ import org.apache.lucene.search.Similarity;
final class NormsWriter extends InvertedDocEndConsumer {
private static final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
private final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
private FieldInfos fieldInfos;
@Override
public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {

View File

@ -73,7 +73,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
norms = ArrayUtil.grow(norms, 1+upto);
}
final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
norms[upto] = Similarity.getDefault().encodeNormValue(norm);
norms[upto] = docState.similarity.encodeNormValue(norm);
docIDs[upto] = docState.docID;
upto++;
}

View File

@ -565,6 +565,11 @@ public abstract class Similarity implements Serializable {
}
/** Decodes a normalization factor stored in an index.
* <p>
* <b>WARNING: If you override this method, you should change the default
* Similarity to your implementation with {@link Similarity#setDefault(Similarity)}.
* Otherwise, your method may not always be called, especially if you omit norms
* for some fields.</b>
* @see #encodeNormValue(float)
*/
public float decodeNormValue(byte b) {
@ -657,7 +662,11 @@ public abstract class Similarity implements Serializable {
* are rounded down to the largest representable value. Positive values too
* small to represent are rounded up to the smallest positive representable
* value.
*
* <p>
* <b>WARNING: If you override this method, you should change the default
* Similarity to your implementation with {@link Similarity#setDefault(Similarity)}.
* Otherwise, your method may not always be called, especially if you omit norms
* for some fields.</b>
* @see org.apache.lucene.document.Field#setBoost(float)
* @see org.apache.lucene.util.SmallFloat
*/

View File

@ -237,4 +237,52 @@ public class TestNorms extends LuceneTestCase {
return norm;
}
class CustomNormEncodingSimilarity extends DefaultSimilarity {
@Override
public byte encodeNormValue(float f) {
return (byte) f;
}
@Override
public float decodeNormValue(byte b) {
return (float) b;
}
@Override
public float computeNorm(String field, FieldInvertState state) {
return (float) state.getLength();
}
}
// LUCENE-1260
public void testCustomEncoder() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
config.setSimilarity(new CustomNormEncodingSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
Document doc = new Document();
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED);
doc.add(foo);
doc.add(bar);
for (int i = 0; i < 100; i++) {
bar.setValue("singleton");
writer.addDocument(doc);
}
IndexReader reader = writer.getReader();
writer.close();
byte fooNorms[] = MultiNorms.norms(reader, "foo");
for (int i = 0; i < reader.maxDoc(); i++)
assertEquals(0, fooNorms[i]);
byte barNorms[] = MultiNorms.norms(reader, "bar");
for (int i = 0; i < reader.maxDoc(); i++)
assertEquals(1, barNorms[i]);
reader.close();
dir.close();
}
}