Resolve the role query and the number of docs lazily (#48036)

This commit ensures that the creation of a DocumentSubsetReader does not
eagerly resolve the role query and the number of docs that match.
We want to delay this expensive operation in order to ensure that we really
need this information when we build it. For this reason the role query and the
number of docs are now resolved on demand. This commit also depends on
https://issues.apache.org/jira/browse/LUCENE-9003 that will also compute the global
number of docs lazily.
This commit is contained in:
Jim Ferenczi 2019-10-25 17:00:24 +02:00 committed by jimczi
parent 2861088a59
commit 7fc413c22c
1 changed files with 34 additions and 9 deletions

View File

@ -17,6 +17,7 @@ import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.CombinedBitSet;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.cache.Cache;
import org.elasticsearch.common.cache.CacheBuilder;
@ -51,7 +52,7 @@ public final class DocumentSubsetReader extends FilterLeafReader {
/**
* Compute the number of live documents. This method is SLOW.
*/
private static int computeNumDocs(LeafReader reader, Query roleQuery, BitSet roleQueryBits) {
private static int computeNumDocs(LeafReader reader, BitSet roleQueryBits) {
final Bits liveDocs = reader.getLiveDocs();
if (roleQueryBits == null) {
return 0;
@ -103,7 +104,7 @@ public final class DocumentSubsetReader extends FilterLeafReader {
throw e;
}
}
return perReaderCache.computeIfAbsent(roleQuery, q -> computeNumDocs(reader, roleQuery, roleQueryBits));
return perReaderCache.computeIfAbsent(roleQuery, q -> computeNumDocs(reader, roleQueryBits));
}
public static final class DocumentSubsetDirectoryReader extends FilterDirectoryReader {
@ -152,21 +153,44 @@ public final class DocumentSubsetReader extends FilterLeafReader {
}
}
private final BitSet roleQueryBits;
private final int numDocs;
private final DocumentSubsetBitsetCache bitsetCache;
private final Query roleQuery;
// we don't use a volatile here because the bitset is resolved before numDocs in the synchronized block
// so any thread that see numDocs != -1 should also see the true value of the roleQueryBits (happens-before).
private BitSet roleQueryBits;
private volatile int numDocs = -1;
private DocumentSubsetReader(final LeafReader in, DocumentSubsetBitsetCache bitsetCache, final Query roleQuery) throws Exception {
private DocumentSubsetReader(final LeafReader in, DocumentSubsetBitsetCache bitsetCache, final Query roleQuery) {
super(in);
this.roleQueryBits = bitsetCache.getBitSet(roleQuery, in.getContext());
this.numDocs = getNumDocs(in, roleQuery, roleQueryBits);
this.bitsetCache = bitsetCache;
this.roleQuery = roleQuery;
}
/**
* Resolve the role query and the number of docs lazily
*/
private void computeNumDocsIfNeeded() {
if (numDocs == -1) {
synchronized (this) {
if (numDocs == -1) {
try {
roleQueryBits = bitsetCache.getBitSet(roleQuery, in.getContext());
numDocs = getNumDocs(in, roleQuery, roleQueryBits);
} catch (Exception e) {
throw new ElasticsearchException("Failed to load role query", e);
}
}
}
}
}
@Override
public Bits getLiveDocs() {
computeNumDocsIfNeeded();
final Bits actualLiveDocs = in.getLiveDocs();
if (roleQueryBits == null) {
// If we would a <code>null</code> liveDocs then that would mean that no docs are marked as deleted,
// but that isn't the case. No docs match with the role query and therefor all docs are marked as deleted
// If we would return a <code>null</code> liveDocs then that would mean that no docs are marked as deleted,
// but that isn't the case. No docs match with the role query and therefore all docs are marked as deleted
return new Bits.MatchNoBits(in.maxDoc());
} else if (actualLiveDocs == null) {
return roleQueryBits;
@ -178,6 +202,7 @@ public final class DocumentSubsetReader extends FilterLeafReader {
@Override
public int numDocs() {
computeNumDocsIfNeeded();
return numDocs;
}