mirror of https://github.com/apache/lucene.git
LUCENE-10292: Suggest: Fix FreeTextSuggester so that getCount() returned results consistent with lookup() during concurrent build()
Fix SuggestRebuildTestUtil to reliably surfice this kind of failure that was previously sporadic
This commit is contained in:
parent
8d9a333fac
commit
a8d86ea6e8
|
@ -140,7 +140,7 @@ public class FreeTextSuggester extends Lookup {
|
||||||
private final byte separator;
|
private final byte separator;
|
||||||
|
|
||||||
/** Number of entries the lookup was built with */
|
/** Number of entries the lookup was built with */
|
||||||
private long count = 0;
|
private volatile long count = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The default character used to join multiple tokens into a single ngram token. The input tokens
|
* The default character used to join multiple tokens into a single ngram token. The input tokens
|
||||||
|
@ -273,7 +273,7 @@ public class FreeTextSuggester extends Lookup {
|
||||||
IndexReader reader = null;
|
IndexReader reader = null;
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
count = 0;
|
long newCount = 0;
|
||||||
try {
|
try {
|
||||||
while (true) {
|
while (true) {
|
||||||
BytesRef surfaceForm = iterator.next();
|
BytesRef surfaceForm = iterator.next();
|
||||||
|
@ -282,7 +282,7 @@ public class FreeTextSuggester extends Lookup {
|
||||||
}
|
}
|
||||||
field.setStringValue(surfaceForm.utf8ToString());
|
field.setStringValue(surfaceForm.utf8ToString());
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
count++;
|
newCount++;
|
||||||
}
|
}
|
||||||
reader = DirectoryReader.open(writer);
|
reader = DirectoryReader.open(writer);
|
||||||
|
|
||||||
|
@ -320,10 +320,13 @@ public class FreeTextSuggester extends Lookup {
|
||||||
fstCompiler.add(Util.toIntsRef(term, scratchInts), encodeWeight(termsEnum.totalTermFreq()));
|
fstCompiler.add(Util.toIntsRef(term, scratchInts), encodeWeight(termsEnum.totalTermFreq()));
|
||||||
}
|
}
|
||||||
|
|
||||||
fst = fstCompiler.compile();
|
final FST<Long> newFst = fstCompiler.compile();
|
||||||
if (fst == null) {
|
if (newFst == null) {
|
||||||
throw new IllegalArgumentException("need at least one suggestion");
|
throw new IllegalArgumentException("need at least one suggestion");
|
||||||
}
|
}
|
||||||
|
fst = newFst;
|
||||||
|
count = newCount;
|
||||||
|
|
||||||
// System.out.println("FST: " + fst.getNodeCount() + " nodes");
|
// System.out.println("FST: " + fst.getNodeCount() + " nodes");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -18,11 +18,13 @@ package org.apache.lucene.search.suggest;
|
||||||
|
|
||||||
import static org.junit.Assert.assertNull;
|
import static org.junit.Assert.assertNull;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.Semaphore;
|
import java.util.concurrent.Semaphore;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/** Reusable Logic for confirming that Lookup impls can return suggestions during a 'rebuild' */
|
/** Reusable Logic for confirming that Lookup impls can return suggestions during a 'rebuild' */
|
||||||
public final class SuggestRebuildTestUtil {
|
public final class SuggestRebuildTestUtil {
|
||||||
|
@ -57,23 +59,27 @@ public final class SuggestRebuildTestUtil {
|
||||||
// modify source data we're going to build from, and spin up background thread that
|
// modify source data we're going to build from, and spin up background thread that
|
||||||
// will rebuild (slowly)
|
// will rebuild (slowly)
|
||||||
data.addAll(extraData);
|
data.addAll(extraData);
|
||||||
final Semaphore rebuildGate = new Semaphore(0);
|
final Semaphore readyToCheck = new Semaphore(0);
|
||||||
|
final Semaphore readyToAdvance = new Semaphore(0);
|
||||||
final AtomicReference<Throwable> buildError = new AtomicReference<>();
|
final AtomicReference<Throwable> buildError = new AtomicReference<>();
|
||||||
final Thread rebuilder =
|
final Thread rebuilder =
|
||||||
new Thread(
|
new Thread(
|
||||||
() -> {
|
() -> {
|
||||||
try {
|
try {
|
||||||
suggester.build(
|
suggester.build(
|
||||||
new InputArrayIterator(new DelayedIterator<>(rebuildGate, data.iterator())));
|
new DelayedInputIterator(
|
||||||
|
readyToCheck, readyToAdvance, new InputArrayIterator(data.iterator())));
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
buildError.set(t);
|
buildError.set(t);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
rebuilder.start();
|
rebuilder.start();
|
||||||
// at every stage of the slow rebuild, we should still be able to get our original suggestions
|
// at every stage of the slow rebuild, we should still be able to get our original suggestions
|
||||||
for (int i = 0; i < data.size(); i++) {
|
// (+1 iteration to ensure final next() call can return null)
|
||||||
|
for (int i = 0; i < data.size() + 1; i++) {
|
||||||
|
readyToCheck.acquire();
|
||||||
initialChecks.check(suggester);
|
initialChecks.check(suggester);
|
||||||
rebuildGate.release();
|
readyToAdvance.release();
|
||||||
}
|
}
|
||||||
// once all the data is released from the iterator, the background rebuild should finish, and
|
// once all the data is released from the iterator, the background rebuild should finish, and
|
||||||
// suggest results
|
// suggest results
|
||||||
|
@ -92,34 +98,54 @@ public final class SuggestRebuildTestUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An iterator wrapper whose {@link Iterator#next} method will only return when a Semaphore permit
|
* An InputArrayIterator wrapper whose {@link InputIterator#next} method releases on a Semaphore,
|
||||||
* is acquirable
|
* and then acquires from a differnet Semaphore.
|
||||||
*/
|
*/
|
||||||
private static final class DelayedIterator<E> implements Iterator<E> {
|
private static final class DelayedInputIterator implements InputIterator {
|
||||||
final Iterator<E> inner;
|
final Semaphore releaseOnNext;
|
||||||
final Semaphore gate;
|
final Semaphore acquireOnNext;
|
||||||
|
final InputIterator inner;
|
||||||
|
|
||||||
public DelayedIterator(final Semaphore gate, final Iterator<E> inner) {
|
public DelayedInputIterator(
|
||||||
assert null != gate;
|
final Semaphore releaseOnNext, final Semaphore acquireOnNext, final InputIterator inner) {
|
||||||
|
assert null != releaseOnNext;
|
||||||
|
assert null != acquireOnNext;
|
||||||
assert null != inner;
|
assert null != inner;
|
||||||
this.gate = gate;
|
this.releaseOnNext = releaseOnNext;
|
||||||
|
this.acquireOnNext = acquireOnNext;
|
||||||
this.inner = inner;
|
this.inner = inner;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean hasNext() {
|
public BytesRef next() throws IOException {
|
||||||
return inner.hasNext();
|
releaseOnNext.release();
|
||||||
}
|
acquireOnNext.acquireUninterruptibly();
|
||||||
|
|
||||||
@Override
|
|
||||||
public E next() {
|
|
||||||
gate.acquireUninterruptibly();
|
|
||||||
return inner.next();
|
return inner.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void remove() {
|
public long weight() {
|
||||||
inner.remove();
|
return inner.weight();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef payload() {
|
||||||
|
return inner.payload();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return inner.hasPayloads();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<BytesRef> contexts() {
|
||||||
|
return inner.contexts();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasContexts() {
|
||||||
|
return inner.hasContexts();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue