mirror of https://github.com/apache/lucene.git
LUCENE-1260: use the provided sim to encode norms
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1056821 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4f7eba849a
commit
eacd3d2636
|
@ -37,7 +37,7 @@ import org.apache.lucene.search.Similarity;
|
||||||
|
|
||||||
final class NormsWriter extends InvertedDocEndConsumer {
|
final class NormsWriter extends InvertedDocEndConsumer {
|
||||||
|
|
||||||
private static final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
|
private final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
@Override
|
@Override
|
||||||
public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
|
public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
|
||||||
|
|
|
@ -73,7 +73,7 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
|
||||||
norms = ArrayUtil.grow(norms, 1+upto);
|
norms = ArrayUtil.grow(norms, 1+upto);
|
||||||
}
|
}
|
||||||
final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
|
final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
|
||||||
norms[upto] = Similarity.getDefault().encodeNormValue(norm);
|
norms[upto] = docState.similarity.encodeNormValue(norm);
|
||||||
docIDs[upto] = docState.docID;
|
docIDs[upto] = docState.docID;
|
||||||
upto++;
|
upto++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -565,6 +565,11 @@ public abstract class Similarity implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Decodes a normalization factor stored in an index.
|
/** Decodes a normalization factor stored in an index.
|
||||||
|
* <p>
|
||||||
|
* <b>WARNING: If you override this method, you should change the default
|
||||||
|
* Similarity to your implementation with {@link Similarity#setDefault(Similarity)}.
|
||||||
|
* Otherwise, your method may not always be called, especially if you omit norms
|
||||||
|
* for some fields.</b>
|
||||||
* @see #encodeNormValue(float)
|
* @see #encodeNormValue(float)
|
||||||
*/
|
*/
|
||||||
public float decodeNormValue(byte b) {
|
public float decodeNormValue(byte b) {
|
||||||
|
@ -657,7 +662,11 @@ public abstract class Similarity implements Serializable {
|
||||||
* are rounded down to the largest representable value. Positive values too
|
* are rounded down to the largest representable value. Positive values too
|
||||||
* small to represent are rounded up to the smallest positive representable
|
* small to represent are rounded up to the smallest positive representable
|
||||||
* value.
|
* value.
|
||||||
*
|
* <p>
|
||||||
|
* <b>WARNING: If you override this method, you should change the default
|
||||||
|
* Similarity to your implementation with {@link Similarity#setDefault(Similarity)}.
|
||||||
|
* Otherwise, your method may not always be called, especially if you omit norms
|
||||||
|
* for some fields.</b>
|
||||||
* @see org.apache.lucene.document.Field#setBoost(float)
|
* @see org.apache.lucene.document.Field#setBoost(float)
|
||||||
* @see org.apache.lucene.util.SmallFloat
|
* @see org.apache.lucene.util.SmallFloat
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -237,4 +237,52 @@ public class TestNorms extends LuceneTestCase {
|
||||||
return norm;
|
return norm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class CustomNormEncodingSimilarity extends DefaultSimilarity {
|
||||||
|
@Override
|
||||||
|
public byte encodeNormValue(float f) {
|
||||||
|
return (byte) f;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float decodeNormValue(byte b) {
|
||||||
|
return (float) b;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float computeNorm(String field, FieldInvertState state) {
|
||||||
|
return (float) state.getLength();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LUCENE-1260
|
||||||
|
public void testCustomEncoder() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
|
||||||
|
config.setSimilarity(new CustomNormEncodingSimilarity());
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
||||||
|
Document doc = new Document();
|
||||||
|
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
doc.add(foo);
|
||||||
|
doc.add(bar);
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
bar.setValue("singleton");
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader reader = writer.getReader();
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
byte fooNorms[] = MultiNorms.norms(reader, "foo");
|
||||||
|
for (int i = 0; i < reader.maxDoc(); i++)
|
||||||
|
assertEquals(0, fooNorms[i]);
|
||||||
|
|
||||||
|
byte barNorms[] = MultiNorms.norms(reader, "bar");
|
||||||
|
for (int i = 0; i < reader.maxDoc(); i++)
|
||||||
|
assertEquals(1, barNorms[i]);
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue