LUCENE-9455: ExitableTermsEnum should sample timeout and interruption check before calling next()

Closes #1998
This commit is contained in:
Zach Chen 2020-10-24 21:46:24 +02:00 committed by Bruno Roustant
parent 2d3a221dcf
commit 98594b54fd
No known key found for this signature in database
GPG Key ID: CD28DABB95360525
3 changed files with 82 additions and 22 deletions

View File

@ -156,6 +156,9 @@ Improvements
* LUCENE-9531: Consolidated CharStream and FastCharStream classes: these have been moved
from each query parser package to org.apache.lucene.queryparser.charstream (Dawid Weiss).
* LUCENE-9455: ExitableTermsEnum should sample timeout and interruption
check before calling next(). (Zach Chen via Bruno Roustant)
Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

View File

@ -496,35 +496,39 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
* exitable enumeration of terms.
*/
public static class ExitableTermsEnum extends FilterTermsEnum {
// Create bit mask in the form of 0000 1111 for efficient checking
private static final int NUM_CALLS_PER_TIMEOUT_CHECK = (1 << 4) - 1; // 15
private int calls;
private QueryTimeout queryTimeout;
/** Constructor **/
public ExitableTermsEnum(TermsEnum termsEnum, QueryTimeout queryTimeout) {
super(termsEnum);
this.queryTimeout = queryTimeout;
checkAndThrow();
checkTimeoutWithSampling();
}
/**
* Throws {@link ExitingReaderException} if {@link QueryTimeout#shouldExit()} returns true,
* or if {@link Thread#interrupted()} returns true.
*/
private void checkAndThrow() {
if (queryTimeout.shouldExit()) {
throw new ExitingReaderException("The request took too long to iterate over terms. Timeout: "
+ queryTimeout.toString()
+ ", TermsEnum=" + in
);
} else if (Thread.interrupted()) {
throw new ExitingReaderException("Interrupted while iterating over terms. TermsEnum=" + in);
private void checkTimeoutWithSampling() {
if ((calls++ & NUM_CALLS_PER_TIMEOUT_CHECK) == 0) {
if (queryTimeout.shouldExit()) {
throw new ExitingReaderException("The request took too long to iterate over terms. Timeout: "
+ queryTimeout.toString()
+ ", TermsEnum=" + in
);
} else if (Thread.interrupted()) {
throw new ExitingReaderException("Interrupted while iterating over terms. TermsEnum=" + in);
}
}
}
@Override
public BytesRef next() throws IOException {
// Before every iteration, check if the iteration should exit
checkAndThrow();
checkTimeoutWithSampling();
return in.next();
}
}

View File

@ -16,18 +16,8 @@
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.*;
import org.apache.lucene.index.ExitableDirectoryReader.ExitingReaderException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
@ -37,6 +27,9 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.util.Arrays;
/**
* Test that uses a default/lucene Implementation of {@link QueryTimeout}
* to exit out long running queries that take too long to iterate over Terms.
@ -166,6 +159,47 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
directory.close();
}
/**
* Tests time out check sampling of TermsEnum iterations
*
* @throws Exception on error
*/
public void testExitableTermsEnumSampleTimeoutCheck() throws Exception {
try (Directory directory = newDirectory()) {
try (IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())))) {
for (int i = 0; i < 50; i++) {
Document d1 = new Document();
d1.add(newTextField("default", "term" + i, Field.Store.YES));
writer.addDocument(d1);
}
writer.forceMerge(1);
writer.commit();
DirectoryReader directoryReader;
DirectoryReader exitableDirectoryReader;
IndexReader reader;
IndexSearcher searcher;
Query query = new PrefixQuery(new Term("default", "term"));
// Set a fairly high timeout value (infinite) and expect the query to complete in that time frame.
// Not checking the validity of the result, but checking the sampling kicks in to reduce the number of timeout check
CountingQueryTimeout queryTimeout = new CountingQueryTimeout();
directoryReader = DirectoryReader.open(directory);
exitableDirectoryReader = new ExitableDirectoryReader(directoryReader, queryTimeout);
reader = new TestReader(getOnlyLeafReader(exitableDirectoryReader));
searcher = new IndexSearcher(reader);
searcher.search(query, 300);
reader.close();
// The number of sampled query time out check here depends on two factors:
// 1. ExitableDirectoryReader.ExitableTermsEnum.NUM_CALLS_PER_TIMEOUT_CHECK
// 2. MultiTermQueryConstantScoreWrapper.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD
assertEquals(5, queryTimeout.getShouldExitCallCount());
}
}
}
/**
* Tests timing out of PointValues queries
*
@ -268,6 +302,25 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
};
}
private static class CountingQueryTimeout implements QueryTimeout {
private int counter = 0;
@Override
public boolean shouldExit() {
counter++;
return false;
}
@Override
public boolean isTimeoutEnabled() {
return true;
}
public int getShouldExitCallCount() {
return counter;
}
}
private static QueryTimeout immediateQueryTimeout() {
return new QueryTimeout() {