LUCENE-3846: compare UTF-8 bytes instead of converting to UTF-16 first

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3846@1400233 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-10-19 19:07:27 +00:00
parent 83a1417bd5
commit 5334f00771
1 changed files with 5 additions and 9 deletions

View File

@ -472,7 +472,7 @@ public class AnalyzingSuggester extends Lookup {
assert num > 0; assert num > 0;
//System.out.println("lookup key=" + key + " num=" + num); //System.out.println("lookup key=" + key + " num=" + num);
final BytesRef utf8Key = new BytesRef(key);
try { try {
Automaton lookupAutomaton = toLookupAutomaton(key); Automaton lookupAutomaton = toLookupAutomaton(key);
@ -538,9 +538,9 @@ public class AnalyzingSuggester extends Lookup {
// nodes we have and the // nodes we have and the
// maxSurfaceFormsPerAnalyzedForm: // maxSurfaceFormsPerAnalyzedForm:
for(MinResult<Pair<Long,BytesRef>> completion : completions) { for(MinResult<Pair<Long,BytesRef>> completion : completions) {
spare.grow(completion.output.output2.length); if (utf8Key.bytesEquals(completion.output.output2)) {
UnicodeUtil.UTF8toUTF16(completion.output.output2, spare); spare.grow(completion.output.output2.length);
if (CHARSEQUENCE_COMPARATOR.compare(spare, key) == 0) { UnicodeUtil.UTF8toUTF16(completion.output.output2, spare);
results.add(new LookupResult(spare.toString(), decodeWeight(completion.output.output1))); results.add(new LookupResult(spare.toString(), decodeWeight(completion.output.output1)));
break; break;
} }
@ -574,16 +574,12 @@ public class AnalyzingSuggester extends Lookup {
// In exactFirst mode, don't accept any paths // In exactFirst mode, don't accept any paths
// matching the surface form since that will // matching the surface form since that will
// create duplicate results: // create duplicate results:
spare.grow(output.output2.length); return !utf8Key.bytesEquals(output.output2);
UnicodeUtil.UTF8toUTF16(output.output2, spare);
return CHARSEQUENCE_COMPARATOR.compare(spare, key) != 0;
} }
} }
}; };
final List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = intersector.intersectAll(); final List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = intersector.intersectAll();
// System.out.println(key);
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) { for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
// System.out.println(UnicodeUtil.newString(path.input.ints, path.input.offset, path.input.length));
searcher.addStartPaths(path.fstNode, path.output, true, path.input); searcher.addStartPaths(path.fstNode, path.output, true, path.input);
} }