BlockTreeTermsWriter should compute prefix lengths using Arrays#mismatch. (#1074)

This commit is contained in:
Adrien Grand 2020-01-06 09:02:51 +01:00 committed by GitHub
parent dcc01fdaa6
commit dd74869347
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 7 additions and 7 deletions

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs.blocktree;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
@ -882,18 +883,17 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
/** Pushes the new term to the top of the stack, and writes new blocks. */
private void pushTerm(BytesRef text) throws IOException {
int limit = Math.min(lastTerm.length(), text.length);
// Find common prefix between last term and current term:
int pos = 0;
while (pos < limit && lastTerm.byteAt(pos) == text.bytes[text.offset+pos]) {
pos++;
int prefixLength = Arrays.mismatch(lastTerm.bytes(), 0, lastTerm.length(), text.bytes, text.offset, text.offset + text.length);
if (prefixLength == -1) { // Only happens for the first term, if it is empty
assert lastTerm.length() == 0;
prefixLength = 0;
}
// if (DEBUG) System.out.println(" shared=" + pos + " lastTerm.length=" + lastTerm.length);
// Close the "abandoned" suffix now:
for(int i=lastTerm.length()-1;i>=pos;i--) {
for(int i=lastTerm.length()-1;i>=prefixLength;i--) {
// How many items on top of the stack share the current suffix
// we are closing:
@ -910,7 +910,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
// Init new tail:
for(int i=pos;i<text.length;i++) {
for(int i=prefixLength;i<text.length;i++) {
prefixStarts[i] = pending.size();
}