From 5334f007713b9a0a8baf39304d8ca28cfc615d54 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 19 Oct 2012 19:07:27 +0000 Subject: [PATCH] LUCENE-3846: compare UTF-8 bytes instead of converting to UTF-16 first git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3846@1400233 13f79535-47bb-0310-9956-ffa450edef68 --- .../suggest/analyzing/AnalyzingSuggester.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index 06e21e976a6..0245d93b2de 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -472,7 +472,7 @@ public class AnalyzingSuggester extends Lookup { assert num > 0; //System.out.println("lookup key=" + key + " num=" + num); - + final BytesRef utf8Key = new BytesRef(key); try { Automaton lookupAutomaton = toLookupAutomaton(key); @@ -538,9 +538,9 @@ public class AnalyzingSuggester extends Lookup { // nodes we have and the // maxSurfaceFormsPerAnalyzedForm: for(MinResult> completion : completions) { - spare.grow(completion.output.output2.length); - UnicodeUtil.UTF8toUTF16(completion.output.output2, spare); - if (CHARSEQUENCE_COMPARATOR.compare(spare, key) == 0) { + if (utf8Key.bytesEquals(completion.output.output2)) { + spare.grow(completion.output.output2.length); + UnicodeUtil.UTF8toUTF16(completion.output.output2, spare); results.add(new LookupResult(spare.toString(), decodeWeight(completion.output.output1))); break; } @@ -574,16 +574,12 @@ public class AnalyzingSuggester extends Lookup { // In exactFirst mode, don't accept any paths // matching the surface form since that will // create duplicate results: - spare.grow(output.output2.length); - UnicodeUtil.UTF8toUTF16(output.output2, spare); - return CHARSEQUENCE_COMPARATOR.compare(spare, key) != 0; + return !utf8Key.bytesEquals(output.output2); } } }; final List>> prefixPaths = intersector.intersectAll(); -// System.out.println(key); for (FSTUtil.Path> path : prefixPaths) { -// System.out.println(UnicodeUtil.newString(path.input.ints, path.input.offset, path.input.length)); searcher.addStartPaths(path.fstNode, path.output, true, path.input); }