diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b8a71911986..36224095a8c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -256,6 +256,9 @@ Bug fixes * LUCENE-6677: QueryParserBase fails to enforce maxDeterminizedStates when creating a WildcardQuery (David Causse via Mike McCandless) +* LUCENE-6680: Preserve two suggestions that have same key and weight but + different payloads (Arcadius Ahouansou via Mike McCandless) + Changes in Runtime Behavior * LUCENE-6501: The subreader structure in ParallelCompositeReader diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java index eb485815f91..9f4a997d80e 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java @@ -324,9 +324,19 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester { } // otherwise on alphabetic order - return CHARSEQUENCE_COMPARATOR.compare(o1.key, o2.key); + int keyCompare = CHARSEQUENCE_COMPARATOR.compare(o1.key, o2.key); + + if (keyCompare != 0) { + return keyCompare; + } + + // if same weight and title, use the payload if there is one + if (o1.payload != null) { + return o1.payload.compareTo(o2.payload); + } + + return 0; } } - } diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java index cb22d36a8ac..87a77a3d0fa 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java @@ -22,6 +22,8 @@ import java.nio.file.Path; import java.util.List; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.search.suggest.Input; @@ -29,9 +31,11 @@ import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; public class BlendedInfixSuggesterTest extends LuceneTestCase { + /** * Test the weight transformation depending on the position * of the matching term. @@ -192,6 +196,65 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { suggester.close(); } + public void testBlendedInfixSuggesterDedupsOnWeightTitleAndPayload() throws Exception { + + //exactly same inputs + Input[] inputDocuments = new Input[] { + new Input("lend me your ear", 7, new BytesRef("uid1")), + new Input("lend me your ear", 7, new BytesRef("uid1")), + }; + duplicateCheck(inputDocuments, 1); + + // inputs differ on payload + inputDocuments = new Input[] { + new Input("lend me your ear", 7, new BytesRef("uid1")), + new Input("lend me your ear", 7, new BytesRef("uid2")), + }; + duplicateCheck(inputDocuments, 2); + + //exactly same input without payloads + inputDocuments = new Input[] { + new Input("lend me your ear", 7), + new Input("lend me your ear", 7), + }; + duplicateCheck(inputDocuments, 1); + + //Same input with first has payloads, second does not + inputDocuments = new Input[] { + new Input("lend me your ear", 7, new BytesRef("uid1")), + new Input("lend me your ear", 7), + }; + duplicateCheck(inputDocuments, 2); + + /**same input, first not having a payload, the second having payload + * we would expect 2 entries out but we are getting only 1 because + * the InputArrayIterator#hasPayloads() returns false because the first + * item has no payload, therefore, when ingested, none of the 2 input has payload and become 1 + */ + inputDocuments = new Input[] { + new Input("lend me your ear", 7), + new Input("lend me your ear", 7, new BytesRef("uid2")), + }; + List results = duplicateCheck(inputDocuments, 1); + assertNull(results.get(0).payload); + + + //exactly same inputs but different weight + inputDocuments = new Input[] { + new Input("lend me your ear", 1, new BytesRef("uid1")), + new Input("lend me your ear", 7, new BytesRef("uid1")), + }; + duplicateCheck(inputDocuments, 2); + + //exactly same inputs but different text + inputDocuments = new Input[] { + new Input("lend me your earings", 7, new BytesRef("uid1")), + new Input("lend me your ear", 7, new BytesRef("uid1")), + }; + duplicateCheck(inputDocuments, 2); + + } + public void /*testT*/rying() throws IOException { BytesRef lake = new BytesRef("lake"); @@ -236,4 +299,23 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { return -1; } + + private List duplicateCheck(Input[] inputs, int expectedSuggestionCount) throws IOException { + + Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); + BlendedInfixSuggester suggester = new BlendedInfixSuggester(newDirectory(), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, + BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,10, false); + + InputArrayIterator inputArrayIterator = new InputArrayIterator(inputs); + suggester.build(inputArrayIterator); + + List results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true); + assertEquals(expectedSuggestionCount, results.size()); + + suggester.close(); + a.close(); + + return results; + } + }