Cache decoded bytes for TFIDFSimilarity scorer. (#1042)

Co-authored-by: Weiming Wu <wweiming@amazon.com>
This commit is contained in:
Weiming Wu 2022-07-26 07:47:52 -04:00 committed by GitHub
parent 94960a0aff
commit 2cf12b8cdc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 11 additions and 3 deletions

View File

@ -271,7 +271,7 @@ public class TermInSetQuery extends Query implements Accountable {
TermIterator iterator = termData.iterator();
// We will first try to collect up to 'threshold' terms into 'matchingTerms'
// if there are two many terms, we will fall back to building the 'builder'
// if there are too many terms, we will fall back to building the 'builder'
final int threshold =
Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, IndexSearcher.getMaxClauseCount());
assert termData.size() > threshold : "Query should have been rewritten";

View File

@ -444,6 +444,15 @@ public abstract class TFIDFSimilarity extends Similarity {
*/
public abstract float lengthNorm(int length);
/** Cache of decoded bytes. */
private static final int[] LENGTH_TABLE = new int[256];
static {
for (int i = 0; i < 256; i++) {
LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
}
}
@Override
public final long computeNorm(FieldInvertState state) {
final int numTerms;
@ -466,8 +475,7 @@ public abstract class TFIDFSimilarity extends Similarity {
: idfExplain(collectionStats, termStats);
float[] normTable = new float[256];
for (int i = 1; i < 256; ++i) {
int length = SmallFloat.byte4ToInt((byte) i);
float norm = lengthNorm(length);
float norm = lengthNorm(LENGTH_TABLE[i]);
normTable[i] = norm;
}
normTable[0] = 1f / normTable[255];