Moving join() helper function to WordScorer

2016-08-10 13:06:43 +02:00 · 2016-08-10 13:06:43 +02:00 · e6d57af0c5
parent cdc77648a1
commit e6d57af0c5
7 changed files with 22 additions and 58 deletions
--- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java
@ -22,61 +22,18 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.search.spell.DirectSpellChecker;
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
 import org.apache.lucene.search.spell.SuggestWordQueue;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.elasticsearch.common.ParseField;
 import java.io.IOException;
 import java.util.Comparator;
 public final class SuggestUtils {
    private static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator();
    private static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR;
    private SuggestUtils() {
        // utils!!
    }
    public static DirectSpellChecker getDirectSpellChecker(DirectSpellcheckerSettings suggestion) {
        DirectSpellChecker directSpellChecker = new DirectSpellChecker();
        directSpellChecker.setAccuracy(suggestion.accuracy());
        Comparator<SuggestWord> comparator;
        switch (suggestion.sort()) {
            case SCORE:
                comparator = SCORE_COMPARATOR;
                break;
            case FREQUENCY:
                comparator = LUCENE_FREQUENCY;
                break;
            default:
                throw new IllegalArgumentException("Illegal suggest sort: " + suggestion.sort());
        }
        directSpellChecker.setComparator(comparator);
        directSpellChecker.setDistance(suggestion.stringDistance());
        directSpellChecker.setMaxEdits(suggestion.maxEdits());
        directSpellChecker.setMaxInspections(suggestion.maxInspections());
        directSpellChecker.setMaxQueryFrequency(suggestion.maxTermFreq());
        directSpellChecker.setMinPrefix(suggestion.prefixLength());
        directSpellChecker.setMinQueryLength(suggestion.minWordLength());
        directSpellChecker.setThresholdFrequency(suggestion.minDocFreq());
        directSpellChecker.setLowerCaseTerms(false);
        return directSpellChecker;
    }
    public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) {
        result.clear();
        for (int i = 0; i < toJoin.length - 1; i++) {
            result.append(toJoin[i]);
            result.append(separator);
        }
        result.append(toJoin[toJoin.length-1]);
        return result.get();
    }
    public abstract static class TokenConsumer {
        protected CharTermAttribute charTermAttr;
        protected PositionIncrementAttribute posIncAttr;
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/Correction.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/Correction.java
@ -20,7 +20,6 @@ package org.elasticsearch.search.suggest.phrase;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
 import java.util.Arrays;
@ -73,7 +72,7 @@ public final class Correction implements Comparable<Correction> {
            len += toJoin[i].length;
        }
        result.grow(len);
-        return SuggestUtils.join(separator, result, toJoin);
+        return WordScorer.join(separator, result, toJoin);
    }
    /** Lower scores sorts first; if scores are equal,
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java
@ -449,7 +449,8 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
            return new LuceneLevenshteinDistance();
        } else if ("levenstein".equals(distanceVal)) {
            return new LevensteinDistance();
-            //TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein
+            // TODO Jaro and Winkler are 2 people - so apply same naming logic
            // as damerau_levenshtein
        } else if ("jarowinkler".equals(distanceVal)) {
            return new JaroWinklerDistance();
        } else if ("ngram".equals(distanceVal)) {
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java
@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
 import java.io.IOException;
@ -41,15 +40,15 @@ final class LaplaceScorer extends WordScorer {
    @Override
    protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
-        SuggestUtils.join(separator, spare, w_1.term, word.term);
+        join(separator, spare, w_1.term, word.term);
        return (alpha + frequency(spare.get())) / (alpha +  w_1.frequency + vocabluarySize);
    }
    @Override
    protected double scoreTrigram(Candidate word, Candidate w_1, Candidate w_2) throws IOException {
-        SuggestUtils.join(separator, spare, w_2.term, w_1.term, word.term);
+        join(separator, spare, w_2.term, w_1.term, word.term);
        long trigramCount = frequency(spare.get());
-        SuggestUtils.join(separator, spare, w_1.term, word.term);
+        join(separator, spare, w_1.term, word.term);
        return (alpha + trigramCount) / (alpha  +  frequency(spare.get()) + vocabluarySize);
    }
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java
@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
 import java.io.IOException;
@ -56,7 +55,7 @@ public final class LinearInterpolatingScorer extends WordScorer {
    @Override
    protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
-        SuggestUtils.join(separator, spare, w_1.term, word.term);
+        join(separator, spare, w_1.term, word.term);
        final long count = frequency(spare.get());
        if (count < 1) {
            return unigramLambda * scoreUnigram(word);
@ -66,12 +65,12 @@ public final class LinearInterpolatingScorer extends WordScorer {
    @Override
    protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws IOException {
-        SuggestUtils.join(separator, spare, w.term, w_1.term, w_2.term);
+        join(separator, spare, w.term, w_1.term, w_2.term);
        final long count = frequency(spare.get());
        if (count < 1) {
            return scoreBigram(w, w_1);
        }
-        SuggestUtils.join(separator, spare, w.term, w_1.term);
+        join(separator, spare, w.term, w_1.term);
        return trigramLambda * (count / (1.d + frequency(spare.get()))) + scoreBigram(w, w_1);
    }
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java
@ -21,7 +21,6 @@ package org.elasticsearch.search.suggest.phrase;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
 import java.io.IOException;
@ -41,7 +40,7 @@ class StupidBackoffScorer extends WordScorer {
    @Override
    protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
-        SuggestUtils.join(separator, spare, w_1.term, word.term);
+        join(separator, spare, w_1.term, word.term);
        final long count = frequency(spare.get());
        if (count < 1) {
            return discount * scoreUnigram(word);
@ -53,12 +52,12 @@ class StupidBackoffScorer extends WordScorer {
    protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws IOException {
        // First see if there are bigrams.  If there aren't then skip looking up the trigram.  This saves lookups
        // when the bigrams and trigrams are rare and we need both anyway.
-        SuggestUtils.join(separator, spare, w_1.term, w.term);
+        join(separator, spare, w_1.term, w.term);
        long bigramCount = frequency(spare.get());
        if (bigramCount < 1) {
            return discount * scoreUnigram(w);
        }
-        SuggestUtils.join(separator, spare, w_2.term, w_1.term, w.term);
+        join(separator, spare, w_2.term, w_1.term, w.term);
        long trigramCount = frequency(spare.get());
        if (trigramCount < 1) {
            return discount * (bigramCount / (w_1.frequency + 0.00000000001d));
--- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java
@ -100,6 +100,16 @@ public abstract class WordScorer {
       return scoreBigram(word, w_1);
   }
   public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) {
       result.clear();
       for (int i = 0; i < toJoin.length - 1; i++) {
           result.append(toJoin[i]);
           result.append(separator);
       }
       result.append(toJoin[toJoin.length-1]);
       return result.get();
   }
   public interface WordScorerFactory {
       WordScorer newScorer(IndexReader reader, Terms terms,
                            String field, double realWordLikelyhood, BytesRef separator) throws IOException;