Use similarity.tf() in MoreLikeThis

This commit is contained in:
Petko Minkov 2022-06-01 11:13:10 -07:00
parent a383253fe1
commit 9b64cbb52d
1 changed files with 3 additions and 2 deletions

View File

@ -637,8 +637,8 @@ public final class MoreLikeThis {
for (Map.Entry<String, Int> tfEntry : perWordTermFrequencies.entrySet()) { // for every word for (Map.Entry<String, Int> tfEntry : perWordTermFrequencies.entrySet()) { // for every word
String word = tfEntry.getKey(); String word = tfEntry.getKey();
int tf = tfEntry.getValue().x; // term freq in the source doc int termFreq = tfEntry.getValue().x; // term freq in the source doc
if (minTermFreq > 0 && tf < minTermFreq) { if (minTermFreq > 0 && termFreq < minTermFreq) {
continue; // filter out words that don't occur enough times in the source continue; // filter out words that don't occur enough times in the source
} }
@ -656,6 +656,7 @@ public final class MoreLikeThis {
continue; // index update problem? continue; // index update problem?
} }
float tf = similarity.tf(termFreq);
float idf = similarity.idf(docFreq, numDocs); float idf = similarity.idf(docFreq, numDocs);
float score = tf * idf; float score = tf * idf;