mirror of https://github.com/apache/lucene.git
LUCENE-3846: compare UTF-8 bytes instead of converting to UTF-16 first
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3846@1400233 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
83a1417bd5
commit
5334f00771
|
@ -472,7 +472,7 @@ public class AnalyzingSuggester extends Lookup {
|
||||||
assert num > 0;
|
assert num > 0;
|
||||||
|
|
||||||
//System.out.println("lookup key=" + key + " num=" + num);
|
//System.out.println("lookup key=" + key + " num=" + num);
|
||||||
|
final BytesRef utf8Key = new BytesRef(key);
|
||||||
try {
|
try {
|
||||||
|
|
||||||
Automaton lookupAutomaton = toLookupAutomaton(key);
|
Automaton lookupAutomaton = toLookupAutomaton(key);
|
||||||
|
@ -538,9 +538,9 @@ public class AnalyzingSuggester extends Lookup {
|
||||||
// nodes we have and the
|
// nodes we have and the
|
||||||
// maxSurfaceFormsPerAnalyzedForm:
|
// maxSurfaceFormsPerAnalyzedForm:
|
||||||
for(MinResult<Pair<Long,BytesRef>> completion : completions) {
|
for(MinResult<Pair<Long,BytesRef>> completion : completions) {
|
||||||
spare.grow(completion.output.output2.length);
|
if (utf8Key.bytesEquals(completion.output.output2)) {
|
||||||
UnicodeUtil.UTF8toUTF16(completion.output.output2, spare);
|
spare.grow(completion.output.output2.length);
|
||||||
if (CHARSEQUENCE_COMPARATOR.compare(spare, key) == 0) {
|
UnicodeUtil.UTF8toUTF16(completion.output.output2, spare);
|
||||||
results.add(new LookupResult(spare.toString(), decodeWeight(completion.output.output1)));
|
results.add(new LookupResult(spare.toString(), decodeWeight(completion.output.output1)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -574,16 +574,12 @@ public class AnalyzingSuggester extends Lookup {
|
||||||
// In exactFirst mode, don't accept any paths
|
// In exactFirst mode, don't accept any paths
|
||||||
// matching the surface form since that will
|
// matching the surface form since that will
|
||||||
// create duplicate results:
|
// create duplicate results:
|
||||||
spare.grow(output.output2.length);
|
return !utf8Key.bytesEquals(output.output2);
|
||||||
UnicodeUtil.UTF8toUTF16(output.output2, spare);
|
|
||||||
return CHARSEQUENCE_COMPARATOR.compare(spare, key) != 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
final List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = intersector.intersectAll();
|
final List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = intersector.intersectAll();
|
||||||
// System.out.println(key);
|
|
||||||
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
|
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
|
||||||
// System.out.println(UnicodeUtil.newString(path.input.ints, path.input.offset, path.input.length));
|
|
||||||
searcher.addStartPaths(path.fstNode, path.output, true, path.input);
|
searcher.addStartPaths(path.fstNode, path.output, true, path.input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue