From 72d6d822ae1f630849baa4cc4c053f1e65af1e4d Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 6 Aug 2013 10:57:21 -0400 Subject: [PATCH] Add highlighting support for suggester. This commit adds general highlighting support to the suggest feature. The only implementation that implements this functionality at this point is the phrase suggester. The API supports a 'pre_tag' and a 'post_tag' that are used to wrap suggested parts of the given user input changed by the suggester. Closes #3442 --- .../elasticsearch/search/suggest/Suggest.java | 21 ++++- .../suggest/phrase/CandidateGenerator.java | 12 ++- .../search/suggest/phrase/Correction.java | 30 +++++-- .../phrase/DirectCandidateGenerator.java | 17 ++-- .../MultiCandidateGeneratorWrapper.java | 4 +- .../phrase/NoisyChannelSpellChecker.java | 2 +- .../suggest/phrase/PhraseSuggestParser.java | 23 +++++- .../suggest/phrase/PhraseSuggester.java | 9 ++- .../phrase/PhraseSuggestionBuilder.java | 21 +++++ .../phrase/PhraseSuggestionContext.java | 18 +++++ .../search/suggest/SuggestSearchTests.java | 19 ++++- .../phrase/NoisyChannelSpellCheckerTests.java | 81 ++++++++++++++----- 12 files changed, 212 insertions(+), 45 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/suggest/Suggest.java b/src/main/java/org/elasticsearch/search/suggest/Suggest.java index c93fadbe390..a68d94f4d77 100644 --- a/src/main/java/org/elasticsearch/search/suggest/Suggest.java +++ b/src/main/java/org/elasticsearch/search/suggest/Suggest.java @@ -496,18 +496,25 @@ public class Suggest implements Iterable 0 && result.bytesEquals(candidate.term)) { BytesRef term = BytesRef.deepCopyOf(result); long freq = frequency(term); - candidates.add(new Candidate(BytesRef.deepCopyOf(term), freq, candidate.stringDistance, score(candidate.frequency, candidate.stringDistance, dictSize))); + candidates.add(new Candidate(BytesRef.deepCopyOf(term), freq, candidate.stringDistance, score(candidate.frequency, candidate.stringDistance, dictSize), false)); } else { - candidates.add(new Candidate(BytesRef.deepCopyOf(result), candidate.frequency, nonErrorLikelihood, score(candidate.frequency, candidate.stringDistance, dictSize))); + candidates.add(new Candidate(BytesRef.deepCopyOf(result), candidate.frequency, nonErrorLikelihood, score(candidate.frequency, candidate.stringDistance, dictSize), false)); } } }, spare); @@ -213,17 +213,20 @@ public final class DirectCandidateGenerator extends CandidateGenerator { public final double stringDistance; public final long frequency; public final double score; + public final boolean userInput; - public Candidate(BytesRef term, long frequency, double stringDistance, double score) { + public Candidate(BytesRef term, long frequency, double stringDistance, double score, boolean userInput) { this.frequency = frequency; this.term = term; this.stringDistance = stringDistance; this.score = score; + this.userInput = userInput; } @Override public String toString() { - return "Candidate [term=" + term.utf8ToString() + ", stringDistance=" + stringDistance + ", frequency=" + frequency + "]"; + return "Candidate [term=" + term.utf8ToString() + ", stringDistance=" + stringDistance + ", frequency=" + frequency + + (userInput ? ", userInput" : "" ) + "]"; } @Override @@ -253,8 +256,8 @@ public final class DirectCandidateGenerator extends CandidateGenerator { } @Override - public Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException { - return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize)); + public Candidate createCandidate(BytesRef term, long frequency, double channelScore, boolean userInput) throws IOException { + return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize), userInput); } } diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java b/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java index 9aa985de103..e23d4e14fd5 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java @@ -72,8 +72,8 @@ public final class MultiCandidateGeneratorWrapper extends CandidateGenerator { return set; } @Override - public Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException { - return candidateGenerator[0].createCandidate(term, frequency, channelScore); + public Candidate createCandidate(BytesRef term, long frequency, double channelScore, boolean userInput) throws IOException { + return candidateGenerator[0].createCandidate(term, frequency, channelScore, userInput); } } diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java b/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java index d693d7e2d0e..3a17b045ae0 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java @@ -93,7 +93,7 @@ public final class NoisyChannelSpellChecker { if (currentSet != null) { candidateSetsList.add(currentSet); } - currentSet = new CandidateSet(Candidate.EMPTY, generator.createCandidate(BytesRef.deepCopyOf(term))); + currentSet = new CandidateSet(Candidate.EMPTY, generator.createCandidate(BytesRef.deepCopyOf(term), true)); } } diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java index 3b1f5afbd05..f87b696690f 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java @@ -105,8 +105,27 @@ public final class PhraseSuggestParser implements SuggestContextParser { } else { throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support array field [" + fieldName + "]"); } - } else if (token == Token.START_OBJECT && "smoothing".equals(fieldName)) { - parseSmoothingModel(parser, suggestion, fieldName); + } else if (token == Token.START_OBJECT) { + if ("smoothing".equals(fieldName)) { + parseSmoothingModel(parser, suggestion, fieldName); + } else if ("highlight".equals(fieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } else if (token.isValue()) { + if ("pre_tag".equals(fieldName) || "preTag".equals(fieldName)) { + suggestion.setPreTag(parser.bytes()); + } else if ("post_tag".equals(fieldName) || "postTag".equals(fieldName)) { + suggestion.setPostTag(parser.bytes()); + } else { + throw new ElasticSearchIllegalArgumentException( + "suggester[phrase][highlight] doesn't support field [" + fieldName + "]"); + } + } + } + } else { + throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support array field [" + fieldName + "]"); + } } else { throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support field [" + fieldName + "]"); } diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java index 6e8e5ae9a95..c337397bb46 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java @@ -73,9 +73,14 @@ public final class PhraseSuggester implements Suggester Suggestion.Entry