From 2cf12b8cdcc629617b2d58c0a2a6336679ff9249 Mon Sep 17 00:00:00 2001 From: Weiming Wu Date: Tue, 26 Jul 2022 07:47:52 -0400 Subject: [PATCH] Cache decoded bytes for TFIDFSimilarity scorer. (#1042) Co-authored-by: Weiming Wu --- .../org/apache/lucene/search/TermInSetQuery.java | 2 +- .../lucene/search/similarities/TFIDFSimilarity.java | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index a77b735378e..fe7db0997df 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -271,7 +271,7 @@ public class TermInSetQuery extends Query implements Accountable { TermIterator iterator = termData.iterator(); // We will first try to collect up to 'threshold' terms into 'matchingTerms' - // if there are two many terms, we will fall back to building the 'builder' + // if there are too many terms, we will fall back to building the 'builder' final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, IndexSearcher.getMaxClauseCount()); assert termData.size() > threshold : "Query should have been rewritten"; diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java index 621ec3682ba..906c7dd2875 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java @@ -444,6 +444,15 @@ public abstract class TFIDFSimilarity extends Similarity { */ public abstract float lengthNorm(int length); + /** Cache of decoded bytes. */ + private static final int[] LENGTH_TABLE = new int[256]; + + static { + for (int i = 0; i < 256; i++) { + LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i); + } + } + @Override public final long computeNorm(FieldInvertState state) { final int numTerms; @@ -466,8 +475,7 @@ public abstract class TFIDFSimilarity extends Similarity { : idfExplain(collectionStats, termStats); float[] normTable = new float[256]; for (int i = 1; i < 256; ++i) { - int length = SmallFloat.byte4ToInt((byte) i); - float norm = lengthNorm(length); + float norm = lengthNorm(LENGTH_TABLE[i]); normTable[i] = norm; } normTable[0] = 1f / normTable[255];