From eacd3d2636d1baefd503e76c467d5ae07cfce728 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 8 Jan 2011 22:31:00 +0000 Subject: [PATCH] LUCENE-1260: use the provided sim to encode norms git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1056821 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/NormsWriter.java | 2 +- .../lucene/index/NormsWriterPerField.java | 2 +- .../org/apache/lucene/search/Similarity.java | 11 ++++- .../org/apache/lucene/index/TestNorms.java | 48 +++++++++++++++++++ 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/NormsWriter.java b/lucene/src/java/org/apache/lucene/index/NormsWriter.java index c145227382b..036832297cf 100644 --- a/lucene/src/java/org/apache/lucene/index/NormsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/NormsWriter.java @@ -37,7 +37,7 @@ import org.apache.lucene.search.Similarity; final class NormsWriter extends InvertedDocEndConsumer { - private static final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f); + private final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f); private FieldInfos fieldInfos; @Override public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { diff --git a/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java index c2b331db057..b0827597097 100644 --- a/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java @@ -73,7 +73,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement norms = ArrayUtil.grow(norms, 1+upto); } final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState); - norms[upto] = Similarity.getDefault().encodeNormValue(norm); + norms[upto] = docState.similarity.encodeNormValue(norm); docIDs[upto] = docState.docID; upto++; } diff --git a/lucene/src/java/org/apache/lucene/search/Similarity.java b/lucene/src/java/org/apache/lucene/search/Similarity.java index 041db1e8ffa..e1278cb3605 100644 --- a/lucene/src/java/org/apache/lucene/search/Similarity.java +++ b/lucene/src/java/org/apache/lucene/search/Similarity.java @@ -565,6 +565,11 @@ public abstract class Similarity implements Serializable { } /** Decodes a normalization factor stored in an index. + *

+ * WARNING: If you override this method, you should change the default + * Similarity to your implementation with {@link Similarity#setDefault(Similarity)}. + * Otherwise, your method may not always be called, especially if you omit norms + * for some fields. * @see #encodeNormValue(float) */ public float decodeNormValue(byte b) { @@ -657,7 +662,11 @@ public abstract class Similarity implements Serializable { * are rounded down to the largest representable value. Positive values too * small to represent are rounded up to the smallest positive representable * value. - * + *

+ * WARNING: If you override this method, you should change the default + * Similarity to your implementation with {@link Similarity#setDefault(Similarity)}. + * Otherwise, your method may not always be called, especially if you omit norms + * for some fields. * @see org.apache.lucene.document.Field#setBoost(float) * @see org.apache.lucene.util.SmallFloat */ diff --git a/lucene/src/test/org/apache/lucene/index/TestNorms.java b/lucene/src/test/org/apache/lucene/index/TestNorms.java index ceeae1552fb..b7efe88fa35 100755 --- a/lucene/src/test/org/apache/lucene/index/TestNorms.java +++ b/lucene/src/test/org/apache/lucene/index/TestNorms.java @@ -237,4 +237,52 @@ public class TestNorms extends LuceneTestCase { return norm; } + class CustomNormEncodingSimilarity extends DefaultSimilarity { + @Override + public byte encodeNormValue(float f) { + return (byte) f; + } + + @Override + public float decodeNormValue(byte b) { + return (float) b; + } + + @Override + public float computeNorm(String field, FieldInvertState state) { + return (float) state.getLength(); + } + } + + // LUCENE-1260 + public void testCustomEncoder() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + config.setSimilarity(new CustomNormEncodingSimilarity()); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); + Document doc = new Document(); + Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); + Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(foo); + doc.add(bar); + + for (int i = 0; i < 100; i++) { + bar.setValue("singleton"); + writer.addDocument(doc); + } + + IndexReader reader = writer.getReader(); + writer.close(); + + byte fooNorms[] = MultiNorms.norms(reader, "foo"); + for (int i = 0; i < reader.maxDoc(); i++) + assertEquals(0, fooNorms[i]); + + byte barNorms[] = MultiNorms.norms(reader, "bar"); + for (int i = 0; i < reader.maxDoc(); i++) + assertEquals(1, barNorms[i]); + + reader.close(); + dir.close(); + } }