Add highlighting support for suggester.
This commit adds general highlighting support to the suggest feature. The only implementation that implements this functionality at this point is the phrase suggester. The API supports a 'pre_tag' and a 'post_tag' that are used to wrap suggested parts of the given user input changed by the suggester. Closes #3442
This commit is contained in:
parent
a938bd57a9
commit
72d6d822ae
|
@ -496,18 +496,25 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
|||
static class Fields {
|
||||
|
||||
static final XContentBuilderString TEXT = new XContentBuilderString("text");
|
||||
static final XContentBuilderString HIGHLIGHTED = new XContentBuilderString("highlighted");
|
||||
static final XContentBuilderString SCORE = new XContentBuilderString("score");
|
||||
|
||||
}
|
||||
|
||||
private Text text;
|
||||
private Text highlighted;
|
||||
private float score;
|
||||
|
||||
public Option(Text text, float score) {
|
||||
public Option(Text text, Text highlighted, float score) {
|
||||
this.text = text;
|
||||
this.highlighted = highlighted;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
public Option(Text text, float score) {
|
||||
this(text, null, score);
|
||||
}
|
||||
|
||||
public Option() {
|
||||
}
|
||||
|
||||
|
@ -518,6 +525,13 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
|||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Copy of suggested text with changes from user supplied text highlighted.
|
||||
*/
|
||||
public Text getHighlighted() {
|
||||
return highlighted;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The score based on the edit distance difference between the suggested term and the
|
||||
* term in the suggest text.
|
||||
|
@ -533,12 +547,14 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
|||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
text = in.readText();
|
||||
highlighted = in.readOptionalText();
|
||||
score = in.readFloat();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeText(text);
|
||||
out.writeOptionalText(highlighted);
|
||||
out.writeFloat(score);
|
||||
}
|
||||
|
||||
|
@ -552,6 +568,9 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
|||
|
||||
protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(Fields.TEXT, text);
|
||||
if (highlighted != null) {
|
||||
builder.field(Fields.HIGHLIGHTED, highlighted);
|
||||
}
|
||||
builder.field(Fields.SCORE, score);
|
||||
return builder;
|
||||
}
|
||||
|
|
|
@ -32,14 +32,18 @@ public abstract class CandidateGenerator {
|
|||
public abstract long frequency(BytesRef term) throws IOException;
|
||||
|
||||
public CandidateSet drawCandidates(BytesRef term) throws IOException {
|
||||
CandidateSet set = new CandidateSet(Candidate.EMPTY, createCandidate(term));
|
||||
CandidateSet set = new CandidateSet(Candidate.EMPTY, createCandidate(term, true));
|
||||
return drawCandidates(set);
|
||||
}
|
||||
|
||||
public Candidate createCandidate(BytesRef term) throws IOException {
|
||||
return createCandidate(term, frequency(term), 1.0);
|
||||
public Candidate createCandidate(BytesRef term, boolean userInput) throws IOException {
|
||||
return createCandidate(term, frequency(term), 1.0, userInput);
|
||||
}
|
||||
public abstract Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException;
|
||||
public Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException {
|
||||
return createCandidate(term, frequency, channelScore, false);
|
||||
}
|
||||
|
||||
public abstract Candidate createCandidate(BytesRef term, long frequency, double channelScore, boolean userInput) throws IOException;
|
||||
|
||||
public abstract CandidateSet drawCandidates(CandidateSet set) throws IOException;
|
||||
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
*/
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
|
||||
import java.util.Arrays;
|
||||
//TODO public for tests
|
||||
public final class Correction {
|
||||
|
||||
|
@ -41,15 +41,33 @@ public final class Correction {
|
|||
}
|
||||
|
||||
public BytesRef join(BytesRef separator) {
|
||||
return join(separator, new BytesRef());
|
||||
return join(separator, null, null);
|
||||
}
|
||||
|
||||
public BytesRef join(BytesRef separator, BytesRef result) {
|
||||
public BytesRef join(BytesRef separator, BytesRef preTag, BytesRef postTag) {
|
||||
return join(separator, new BytesRef(), preTag, postTag);
|
||||
}
|
||||
|
||||
public BytesRef join(BytesRef separator, BytesRef result, BytesRef preTag, BytesRef postTag) {
|
||||
BytesRef[] toJoin = new BytesRef[this.candidates.length];
|
||||
int len = separator.length * this.candidates.length - 1;
|
||||
for (int i = 0; i < toJoin.length; i++) {
|
||||
toJoin[i] = candidates[i].term;
|
||||
len += toJoin[i].length;
|
||||
Candidate candidate = candidates[i];
|
||||
if (preTag == null || candidate.userInput) {
|
||||
toJoin[i] = candidate.term;
|
||||
} else {
|
||||
final int maxLen = preTag.length + postTag.length + candidate.term.length;
|
||||
final BytesRef highlighted = new BytesRef(maxLen);// just allocate once
|
||||
if (i == 0 || candidates[i-1].userInput) {
|
||||
highlighted.append(preTag);
|
||||
}
|
||||
highlighted.append(candidate.term);
|
||||
if (toJoin.length == i + 1 || candidates[i+1].userInput) {
|
||||
highlighted.append(postTag);
|
||||
}
|
||||
toJoin[i] = highlighted;
|
||||
}
|
||||
len += toJoin[i].length;
|
||||
}
|
||||
result.offset = 0;
|
||||
result.grow(len);
|
||||
|
|
|
@ -126,7 +126,7 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
for (int i = 0; i < suggestSimilar.length; i++) {
|
||||
SuggestWord suggestWord = suggestSimilar[i];
|
||||
BytesRef candidate = new BytesRef(suggestWord.string);
|
||||
postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize)), spare, byteSpare, candidates);
|
||||
postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
|
||||
}
|
||||
set.addCandidates(candidates);
|
||||
return set;
|
||||
|
@ -160,9 +160,9 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
if (posIncAttr.getPositionIncrement() > 0 && result.bytesEquals(candidate.term)) {
|
||||
BytesRef term = BytesRef.deepCopyOf(result);
|
||||
long freq = frequency(term);
|
||||
candidates.add(new Candidate(BytesRef.deepCopyOf(term), freq, candidate.stringDistance, score(candidate.frequency, candidate.stringDistance, dictSize)));
|
||||
candidates.add(new Candidate(BytesRef.deepCopyOf(term), freq, candidate.stringDistance, score(candidate.frequency, candidate.stringDistance, dictSize), false));
|
||||
} else {
|
||||
candidates.add(new Candidate(BytesRef.deepCopyOf(result), candidate.frequency, nonErrorLikelihood, score(candidate.frequency, candidate.stringDistance, dictSize)));
|
||||
candidates.add(new Candidate(BytesRef.deepCopyOf(result), candidate.frequency, nonErrorLikelihood, score(candidate.frequency, candidate.stringDistance, dictSize), false));
|
||||
}
|
||||
}
|
||||
}, spare);
|
||||
|
@ -213,17 +213,20 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
public final double stringDistance;
|
||||
public final long frequency;
|
||||
public final double score;
|
||||
public final boolean userInput;
|
||||
|
||||
public Candidate(BytesRef term, long frequency, double stringDistance, double score) {
|
||||
public Candidate(BytesRef term, long frequency, double stringDistance, double score, boolean userInput) {
|
||||
this.frequency = frequency;
|
||||
this.term = term;
|
||||
this.stringDistance = stringDistance;
|
||||
this.score = score;
|
||||
this.userInput = userInput;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Candidate [term=" + term.utf8ToString() + ", stringDistance=" + stringDistance + ", frequency=" + frequency + "]";
|
||||
return "Candidate [term=" + term.utf8ToString() + ", stringDistance=" + stringDistance + ", frequency=" + frequency +
|
||||
(userInput ? ", userInput" : "" ) + "]";
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -253,8 +256,8 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException {
|
||||
return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize));
|
||||
public Candidate createCandidate(BytesRef term, long frequency, double channelScore, boolean userInput) throws IOException {
|
||||
return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize), userInput);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -72,8 +72,8 @@ public final class MultiCandidateGeneratorWrapper extends CandidateGenerator {
|
|||
return set;
|
||||
}
|
||||
@Override
|
||||
public Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException {
|
||||
return candidateGenerator[0].createCandidate(term, frequency, channelScore);
|
||||
public Candidate createCandidate(BytesRef term, long frequency, double channelScore, boolean userInput) throws IOException {
|
||||
return candidateGenerator[0].createCandidate(term, frequency, channelScore, userInput);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -93,7 +93,7 @@ public final class NoisyChannelSpellChecker {
|
|||
if (currentSet != null) {
|
||||
candidateSetsList.add(currentSet);
|
||||
}
|
||||
currentSet = new CandidateSet(Candidate.EMPTY, generator.createCandidate(BytesRef.deepCopyOf(term)));
|
||||
currentSet = new CandidateSet(Candidate.EMPTY, generator.createCandidate(BytesRef.deepCopyOf(term), true));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -105,8 +105,27 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
|||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support array field [" + fieldName + "]");
|
||||
}
|
||||
} else if (token == Token.START_OBJECT && "smoothing".equals(fieldName)) {
|
||||
parseSmoothingModel(parser, suggestion, fieldName);
|
||||
} else if (token == Token.START_OBJECT) {
|
||||
if ("smoothing".equals(fieldName)) {
|
||||
parseSmoothingModel(parser, suggestion, fieldName);
|
||||
} else if ("highlight".equals(fieldName)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
fieldName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
if ("pre_tag".equals(fieldName) || "preTag".equals(fieldName)) {
|
||||
suggestion.setPreTag(parser.bytes());
|
||||
} else if ("post_tag".equals(fieldName) || "postTag".equals(fieldName)) {
|
||||
suggestion.setPostTag(parser.bytes());
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException(
|
||||
"suggester[phrase][highlight] doesn't support field [" + fieldName + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support array field [" + fieldName + "]");
|
||||
}
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support field [" + fieldName + "]");
|
||||
}
|
||||
|
|
|
@ -73,9 +73,14 @@ public final class PhraseSuggester implements Suggester<PhraseSuggestionContext>
|
|||
Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length);
|
||||
BytesRef byteSpare = new BytesRef();
|
||||
for (Correction correction : corrections) {
|
||||
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare), spare);
|
||||
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
|
||||
Text phrase = new StringText(spare.toString());
|
||||
resultEntry.addOption(new Suggestion.Entry.Option(phrase, (float) (correction.score)));
|
||||
Text highlighted = null;
|
||||
if (suggestion.getPreTag() != null) {
|
||||
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
|
||||
highlighted = new StringText(spare.toString());
|
||||
}
|
||||
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
|
||||
}
|
||||
final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize());
|
||||
response.addTerm(resultEntry);
|
||||
|
|
|
@ -44,6 +44,8 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
|
|||
private SmoothingModel model;
|
||||
private Boolean forceUnigrams;
|
||||
private Integer tokenLimit;
|
||||
private String preTag;
|
||||
private String postTag;
|
||||
|
||||
public PhraseSuggestionBuilder(String name) {
|
||||
super(name, "phrase");
|
||||
|
@ -147,6 +149,19 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup highlighting for suggestions. If this is called a highlight field
|
||||
* is returned with suggestions wrapping changed tokens with preTag and postTag.
|
||||
*/
|
||||
public PhraseSuggestionBuilder highlight(String preTag, String postTag) {
|
||||
if (preTag == null || postTag == null) {
|
||||
throw new ElasticSearchIllegalArgumentException("Pre and post tag must not be null.");
|
||||
}
|
||||
this.preTag = preTag;
|
||||
this.postTag = postTag;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
if (realWordErrorLikelihood != null) {
|
||||
|
@ -185,6 +200,12 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder<PhraseSugge
|
|||
model.toXContent(builder, params);
|
||||
builder.endObject();
|
||||
}
|
||||
if (preTag != null) {
|
||||
builder.startObject("highlight");
|
||||
builder.field("pre_tag", preTag);
|
||||
builder.field("post_tag", postTag);
|
||||
builder.endObject();
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
|
||||
|
|
|
@ -38,6 +38,8 @@ class PhraseSuggestionContext extends SuggestionContext {
|
|||
private int gramSize = 1;
|
||||
private float confidence = 1.0f;
|
||||
private int tokenLimit = NoisyChannelSpellChecker.DEFAULT_TOKEN_LIMIT;
|
||||
private BytesRef preTag;
|
||||
private BytesRef postTag;
|
||||
|
||||
private WordScorer.WordScorerFactory scorer;
|
||||
|
||||
|
@ -162,4 +164,20 @@ class PhraseSuggestionContext extends SuggestionContext {
|
|||
public int getTokenLimit() {
|
||||
return tokenLimit;
|
||||
}
|
||||
|
||||
public void setPreTag(BytesRef preTag) {
|
||||
this.preTag = preTag;
|
||||
}
|
||||
|
||||
public BytesRef getPreTag() {
|
||||
return preTag;
|
||||
}
|
||||
|
||||
public void setPostTag(BytesRef postTag) {
|
||||
this.postTag = postTag;
|
||||
}
|
||||
|
||||
public BytesRef getPostTag() {
|
||||
return postTag;
|
||||
}
|
||||
}
|
|
@ -50,7 +50,6 @@ import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
|
|||
import static org.elasticsearch.search.suggest.SuggestBuilder.phraseSuggestion;
|
||||
import static org.elasticsearch.search.suggest.SuggestBuilder.termSuggestion;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestionSize;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
|
@ -592,6 +591,24 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
|
|||
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel"));
|
||||
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel"));
|
||||
|
||||
// Ask for highlighting
|
||||
searchSuggest = searchSuggest(client(), "Xor the Got-Jewel",
|
||||
phraseSuggestion("simple_phrase").
|
||||
realWordErrorLikelihood(0.95f).field("bigram").gramSize(2).analyzer("body")
|
||||
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always"))
|
||||
.maxErrors(0.5f)
|
||||
.size(1)
|
||||
.highlight("<em>", "</em>"));
|
||||
|
||||
assertThat(searchSuggest, notNullValue());
|
||||
assertThat(searchSuggest.size(), equalTo(1));
|
||||
assertThat(searchSuggest.getSuggestion("simple_phrase").getName(), equalTo("simple_phrase"));
|
||||
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().size(), equalTo(1));
|
||||
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1));
|
||||
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel"));
|
||||
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel"));
|
||||
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getHighlighted().string(), equalTo("<em>xorr</em> the <em>god</em> jewel"));
|
||||
|
||||
|
||||
// pass in a correct phrase
|
||||
searchSuggest = searchSuggest(client(), "Xorr the God-Jewel",
|
||||
|
|
|
@ -17,7 +17,16 @@ package org.elasticsearch.test.unit.search.suggest.phrase;
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
import com.google.common.base.Charsets;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
@ -40,16 +49,23 @@ import org.apache.lucene.search.spell.SuggestMode;
|
|||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.search.suggest.phrase.*;
|
||||
import org.elasticsearch.search.suggest.phrase.CandidateGenerator;
|
||||
import org.elasticsearch.search.suggest.phrase.Correction;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator;
|
||||
import org.elasticsearch.search.suggest.phrase.LaplaceScorer;
|
||||
import org.elasticsearch.search.suggest.phrase.LinearInterpoatingScorer;
|
||||
import org.elasticsearch.search.suggest.phrase.MultiCandidateGeneratorWrapper;
|
||||
import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker;
|
||||
import org.elasticsearch.search.suggest.phrase.StupidBackoffScorer;
|
||||
import org.elasticsearch.search.suggest.phrase.WordScorer;
|
||||
import org.elasticsearch.test.integration.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import com.google.common.base.Charsets;
|
||||
public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
||||
private final BytesRef space = new BytesRef(" ");
|
||||
private final BytesRef preTag = new BytesRef("<em>");
|
||||
private final BytesRef postTag = new BytesRef("</em>");
|
||||
|
||||
@Test
|
||||
public void testMarvelHeros() throws IOException {
|
||||
|
@ -98,28 +114,47 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
|||
DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
|
||||
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
|
||||
assertThat(corrections.length, equalTo(1));
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("american ace"));
|
||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american <em>ace</em>"));
|
||||
|
||||
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 0, 1);
|
||||
assertThat(corrections.length, equalTo(1));
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ame"));
|
||||
|
||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("american ame"));
|
||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ame"));
|
||||
|
||||
suggester = new NoisyChannelSpellChecker(0.85);
|
||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
|
||||
assertThat(corrections.length, equalTo(4));
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
||||
assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("xorn the god jewel"));
|
||||
assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel"));
|
||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
|
||||
assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
|
||||
assertThat(corrections[2].join(space).utf8ToString(), equalTo("xorn the god jewel"));
|
||||
assertThat(corrections[3].join(space).utf8ToString(), equalTo("xorr the got jewel"));
|
||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorr</em> the <em>god</em> jewel"));
|
||||
assertThat(corrections[1].join(space, preTag, postTag).utf8ToString(), equalTo("xor the <em>god</em> jewel"));
|
||||
assertThat(corrections[2].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorn</em> the <em>god</em> jewel"));
|
||||
assertThat(corrections[3].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorr</em> the got jewel"));
|
||||
|
||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 2);
|
||||
assertThat(corrections.length, equalTo(4));
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
||||
assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("xorn the god jewel"));
|
||||
assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel"));
|
||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
|
||||
assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
|
||||
assertThat(corrections[2].join(space).utf8ToString(), equalTo("xorn the god jewel"));
|
||||
assertThat(corrections[3].join(space).utf8ToString(), equalTo("xorr the got jewel"));
|
||||
|
||||
// Test some of the highlighting corner cases
|
||||
suggester = new NoisyChannelSpellChecker(0.85);
|
||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2);
|
||||
assertThat(corrections.length, equalTo(4));
|
||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
|
||||
assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
|
||||
assertThat(corrections[2].join(space).utf8ToString(), equalTo("xorn the god jewel"));
|
||||
assertThat(corrections[3].join(space).utf8ToString(), equalTo("xor teh god jewel"));
|
||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorr the god</em> jewel"));
|
||||
assertThat(corrections[1].join(space, preTag, postTag).utf8ToString(), equalTo("xor <em>the god</em> jewel"));
|
||||
assertThat(corrections[2].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorn the god</em> jewel"));
|
||||
assertThat(corrections[3].join(space, preTag, postTag).utf8ToString(), equalTo("xor teh <em>god</em> jewel"));
|
||||
|
||||
// test synonyms
|
||||
|
||||
|
@ -146,11 +181,19 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
|
|||
suggester = new NoisyChannelSpellChecker(0.85);
|
||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||
assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america"));
|
||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
|
||||
|
||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer);
|
||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
|
||||
|
||||
// Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
|
||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer);
|
||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain <em>america</em>"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue