diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index deb707836ba..a68d7e3d762 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -118,6 +118,13 @@ Build * LUCENE-7653: Update randomizedtesting to version 2.5.0. (Dawid Weiss) +======================= Lucene 6.4.1 ======================= + +Bug Fixes + +* LUCENE-7657: Fixed potential memory leak in the case that a (Span)TermQuery + with a TermContext is cached. (Adrien Grand) + ======================= Lucene 6.4.0 ======================= API Changes diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java b/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java index 247fa57bc10..dada3ff7263 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java @@ -32,7 +32,12 @@ public abstract class IndexReaderContext { public final int docBaseInParent; /** the ord for this reader in the parent, 0 if parent is null */ public final int ordInParent; - + + // An object that uniquely identifies this context without referencing + // segments. The goal is to make it fine to have references to this + // identity object, even after the index reader has been closed + final Object identity = new Object(); + IndexReaderContext(CompositeReaderContext parent, int ordInParent, int docBaseInParent) { if (!(this instanceof CompositeReaderContext || this instanceof LeafReaderContext)) throw new Error("This class should never be extended by custom code!"); diff --git a/lucene/core/src/java/org/apache/lucene/index/TermContext.java b/lucene/core/src/java/org/apache/lucene/index/TermContext.java index e55aeba7c75..ed25564e461 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermContext.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermContext.java @@ -33,12 +33,8 @@ import java.util.Arrays; */ public final class TermContext { - /** Holds the {@link IndexReaderContext} of the top-level - * {@link IndexReader}, used internally only for - * asserting. - * - * @lucene.internal */ - public final IndexReaderContext topReaderContext; + // Important: do NOT keep hard references to index readers + private final Object topReaderContextIdentity; private final TermState[] states; private int docFreq; private long totalTermFreq; @@ -50,7 +46,7 @@ public final class TermContext { */ public TermContext(IndexReaderContext context) { assert context != null && context.isTopLevel; - topReaderContext = context; + topReaderContextIdentity = context.identity; docFreq = 0; totalTermFreq = 0; final int len; @@ -61,7 +57,16 @@ public final class TermContext { } states = new TermState[len]; } - + + /** + * Expert: Return whether this {@link TermContext} was built for the given + * {@link IndexReaderContext}. This is typically used for assertions. + * @lucene.internal + */ + public boolean wasBuiltFor(IndexReaderContext context) { + return topReaderContextIdentity == context.identity; + } + /** * Creates a {@link TermContext} with an initial {@link TermState}, * {@link IndexReader} pair. diff --git a/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java index 85b8b0a4f39..3a0cdc5a1ef 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; @@ -264,7 +265,7 @@ public final class BlendedTermQuery extends Query { public final Query rewrite(IndexReader reader) throws IOException { final TermContext[] contexts = Arrays.copyOf(this.contexts, this.contexts.length); for (int i = 0; i < contexts.length; ++i) { - if (contexts[i] == null || contexts[i].topReaderContext != reader.getContext()) { + if (contexts[i] == null || contexts[i].wasBuiltFor(reader.getContext()) == false) { contexts[i] = TermContext.build(reader.getContext(), terms[i]); } } @@ -284,7 +285,7 @@ public final class BlendedTermQuery extends Query { } for (int i = 0; i < contexts.length; ++i) { - contexts[i] = adjustFrequencies(contexts[i], df, ttf); + contexts[i] = adjustFrequencies(reader.getContext(), contexts[i], df, ttf); } Query[] termQueries = new Query[terms.length]; @@ -297,15 +298,16 @@ public final class BlendedTermQuery extends Query { return rewriteMethod.rewrite(termQueries); } - private static TermContext adjustFrequencies(TermContext ctx, int artificialDf, long artificialTtf) { - List leaves = ctx.topReaderContext.leaves(); + private static TermContext adjustFrequencies(IndexReaderContext readerContext, + TermContext ctx, int artificialDf, long artificialTtf) { + List leaves = readerContext.leaves(); final int len; if (leaves == null) { len = 1; } else { len = leaves.size(); } - TermContext newCtx = new TermContext(ctx.topReaderContext); + TermContext newCtx = new TermContext(readerContext); for (int i = 0; i < len; ++i) { TermState termState = ctx.get(i); if (termState == null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 73170b91560..e3e299f6bb5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -86,7 +86,7 @@ public class TermQuery extends Query { @Override public Scorer scorer(LeafReaderContext context) throws IOException { - assert termStates == null || termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; + assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);; final TermsEnum termsEnum = getTermsEnum(context); if (termsEnum == null) { return null; @@ -103,7 +103,7 @@ public class TermQuery extends Query { private TermsEnum getTermsEnum(LeafReaderContext context) throws IOException { if (termStates != null) { // TermQuery either used as a Query or the term states have been provided at construction time - assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); + assert termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); final TermState state = termStates.get(context.ord); if (state == null) { // term is not present in that reader assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term; @@ -181,7 +181,7 @@ public class TermQuery extends Query { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; if (perReaderTermState == null - || perReaderTermState.topReaderContext != context) { + || perReaderTermState.wasBuiltFor(context) == false) { if (needsScores) { // make TermQuery single-pass if we don't have a PRTS or if the context // differs! diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 2746a0c1545..3e13be7ecb1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -67,7 +67,7 @@ public class SpanTermQuery extends SpanQuery { public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { final TermContext context; final IndexReaderContext topContext = searcher.getTopReaderContext(); - if (termContext == null || termContext.topReaderContext != topContext) { + if (termContext == null || termContext.wasBuiltFor(topContext) == false) { context = TermContext.build(topContext, term); } else { @@ -99,7 +99,7 @@ public class SpanTermQuery extends SpanQuery { @Override public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { - assert termContext.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termContext.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); + assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); final TermState state = termContext.get(context.ord); if (state == null) { // term is not present in that reader diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java index fbf3dc3ca0e..04c8736fda1 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java @@ -378,7 +378,7 @@ public class TermAutomatonQuery extends Query { boolean any = false; for(Map.Entry ent : termStates.entrySet()) { TermContext termContext = ent.getValue(); - assert termContext.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termContext.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); + assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); BytesRef term = idToTerm.get(ent.getKey()); TermState state = termContext.get(context.ord); if (state != null) {