Moving join() helper function to WordScorer

2025-03-01 00:19:11 +00:00 · 2016-08-10 13:06:43 +02:00 · 2016-08-10 13:06:43 +02:00 · e6d57af0c5
commit e6d57af0c5
parent cdc77648a1
7 changed files with 22 additions and 58 deletions
--- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java
@ -22,61 +22,18 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.search.spell.DirectSpellChecker;
-import org.apache.lucene.search.spell.SuggestWord;
-import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
-import org.apache.lucene.search.spell.SuggestWordQueue;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.elasticsearch.common.ParseField;

 import java.io.IOException;
-import java.util.Comparator;

 public final class SuggestUtils {
-    private static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator();
-    private static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR;

    private SuggestUtils() {
        // utils!!
    }

-    public static DirectSpellChecker getDirectSpellChecker(DirectSpellcheckerSettings suggestion) {
-        DirectSpellChecker directSpellChecker = new DirectSpellChecker();
-        directSpellChecker.setAccuracy(suggestion.accuracy());
-        Comparator<SuggestWord> comparator;
-        switch (suggestion.sort()) {
-            case SCORE:
-                comparator = SCORE_COMPARATOR;
-                break;
-            case FREQUENCY:
-                comparator = LUCENE_FREQUENCY;
-                break;
-            default:
-                throw new IllegalArgumentException("Illegal suggest sort: " + suggestion.sort());
-        }
-        directSpellChecker.setComparator(comparator);
-        directSpellChecker.setDistance(suggestion.stringDistance());
-        directSpellChecker.setMaxEdits(suggestion.maxEdits());
-        directSpellChecker.setMaxInspections(suggestion.maxInspections());
-        directSpellChecker.setMaxQueryFrequency(suggestion.maxTermFreq());
-        directSpellChecker.setMinPrefix(suggestion.prefixLength());
-        directSpellChecker.setMinQueryLength(suggestion.minWordLength());
-        directSpellChecker.setThresholdFrequency(suggestion.minDocFreq());
-        directSpellChecker.setLowerCaseTerms(false);
-        return directSpellChecker;
-    }
-
-    public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) {
-        result.clear();
-        for (int i = 0; i < toJoin.length - 1; i++) {
-            result.append(toJoin[i]);
-            result.append(separator);
-        }
-        result.append(toJoin[toJoin.length-1]);
-        return result.get();
-    }
-
    public abstract static class TokenConsumer {
        protected CharTermAttribute charTermAttr;
        protected PositionIncrementAttribute posIncAttr;
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/Correction.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/Correction.java
@ -20,7 +20,6 @@ package org.elasticsearch.search.suggest.phrase;

 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
-import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;

 import java.util.Arrays;
@ -73,7 +72,7 @@ public final class Correction implements Comparable<Correction> {
            len += toJoin[i].length;
        }
        result.grow(len);
-        return SuggestUtils.join(separator, result, toJoin);
+        return WordScorer.join(separator, result, toJoin);
    }

    /** Lower scores sorts first; if scores are equal,
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java
@ -449,7 +449,8 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
            return new LuceneLevenshteinDistance();
        } else if ("levenstein".equals(distanceVal)) {
            return new LevensteinDistance();
-            //TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein
+            // TODO Jaro and Winkler are 2 people - so apply same naming logic
+            // as damerau_levenshtein
        } else if ("jarowinkler".equals(distanceVal)) {
            return new JaroWinklerDistance();
        } else if ("ngram".equals(distanceVal)) {
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java
@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;

 import java.io.IOException;
@ -41,15 +40,15 @@ final class LaplaceScorer extends WordScorer {

    @Override
    protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
-        SuggestUtils.join(separator, spare, w_1.term, word.term);
+        join(separator, spare, w_1.term, word.term);
        return (alpha + frequency(spare.get())) / (alpha +  w_1.frequency + vocabluarySize);
    }

    @Override
    protected double scoreTrigram(Candidate word, Candidate w_1, Candidate w_2) throws IOException {
-        SuggestUtils.join(separator, spare, w_2.term, w_1.term, word.term);
+        join(separator, spare, w_2.term, w_1.term, word.term);
        long trigramCount = frequency(spare.get());
-        SuggestUtils.join(separator, spare, w_1.term, word.term);
+        join(separator, spare, w_1.term, word.term);
        return (alpha + trigramCount) / (alpha  +  frequency(spare.get()) + vocabluarySize);
    }

--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java
@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;

 import java.io.IOException;
@ -56,7 +55,7 @@ public final class LinearInterpolatingScorer extends WordScorer {

    @Override
    protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
-        SuggestUtils.join(separator, spare, w_1.term, word.term);
+        join(separator, spare, w_1.term, word.term);
        final long count = frequency(spare.get());
        if (count < 1) {
            return unigramLambda * scoreUnigram(word);
@ -66,12 +65,12 @@ public final class LinearInterpolatingScorer extends WordScorer {

    @Override
    protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws IOException {
-        SuggestUtils.join(separator, spare, w.term, w_1.term, w_2.term);
+        join(separator, spare, w.term, w_1.term, w_2.term);
        final long count = frequency(spare.get());
        if (count < 1) {
            return scoreBigram(w, w_1);
        }
-        SuggestUtils.join(separator, spare, w.term, w_1.term);
+        join(separator, spare, w.term, w_1.term);
        return trigramLambda * (count / (1.d + frequency(spare.get()))) + scoreBigram(w, w_1);
    }

--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java
@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;

 import java.io.IOException;
@ -41,7 +40,7 @@ class StupidBackoffScorer extends WordScorer {

    @Override
    protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
-        SuggestUtils.join(separator, spare, w_1.term, word.term);
+        join(separator, spare, w_1.term, word.term);
        final long count = frequency(spare.get());
        if (count < 1) {
            return discount * scoreUnigram(word);
@ -53,12 +52,12 @@ class StupidBackoffScorer extends WordScorer {
    protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws IOException {
        // First see if there are bigrams.  If there aren't then skip looking up the trigram.  This saves lookups
        // when the bigrams and trigrams are rare and we need both anyway.
-        SuggestUtils.join(separator, spare, w_1.term, w.term);
+        join(separator, spare, w_1.term, w.term);
        long bigramCount = frequency(spare.get());
        if (bigramCount < 1) {
            return discount * scoreUnigram(w);
        }
-        SuggestUtils.join(separator, spare, w_2.term, w_1.term, w.term);
+        join(separator, spare, w_2.term, w_1.term, w.term);
        long trigramCount = frequency(spare.get());
        if (trigramCount < 1) {
            return discount * (bigramCount / (w_1.frequency + 0.00000000001d));
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java
@ -100,6 +100,16 @@ public abstract class WordScorer {
       return scoreBigram(word, w_1);
   }

+   public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) {
+       result.clear();
+       for (int i = 0; i < toJoin.length - 1; i++) {
+           result.append(toJoin[i]);
+           result.append(separator);
+       }
+       result.append(toJoin[toJoin.length-1]);
+       return result.get();
+   }
+
   public interface WordScorerFactory {
       WordScorer newScorer(IndexReader reader, Terms terms,
                            String field, double realWordLikelyhood, BytesRef separator) throws IOException;