From ac6ee32b605558dda19f0b70aa29c8599dcf6aea Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 14 Sep 2008 10:32:59 +0000 Subject: [PATCH] LUCENE-1383: workaround the 'leak' in Java's ThreadLocal to prevent Lucene from causing OutOfMemoryError in certain situations, eg J2EE applications git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@695184 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 5 ++ .../org/apache/lucene/index/FieldsReader.java | 9 +- .../apache/lucene/index/SegmentReader.java | 7 +- .../apache/lucene/index/TermInfosReader.java | 5 +- .../lucene/util/CloseableThreadLocal.java | 88 +++++++++++++++++++ 5 files changed, 104 insertions(+), 10 deletions(-) create mode 100644 src/java/org/apache/lucene/util/CloseableThreadLocal.java diff --git a/CHANGES.txt b/CHANGES.txt index ab537c5c07e..e2922e71738 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -207,6 +207,11 @@ Bug fixes 15. LUCENE-1351: ISOLatin1AccentFilter now cleans additional ligatures (Cedrik Lime via Grant Ingersoll) +16. LUCENE-1383: Workaround a nasty "leak" in Java's builtin + ThreadLocal, to prevent Lucene from causing unexpected + OutOfMemoryError in certain situations (notably J2EE + applications). (Chris Lu via Mike McCandless) + New features 1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis diff --git a/src/java/org/apache/lucene/index/FieldsReader.java b/src/java/org/apache/lucene/index/FieldsReader.java index 50665d19f0b..a45a84f598a 100644 --- a/src/java/org/apache/lucene/index/FieldsReader.java +++ b/src/java/org/apache/lucene/index/FieldsReader.java @@ -23,6 +23,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.BufferedIndexInput; +import org.apache.lucene.util.CloseableThreadLocal; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -58,7 +59,7 @@ final class FieldsReader { // file. This will be 0 if we have our own private file. private int docStoreOffset; - private ThreadLocal fieldsStreamTL = new ThreadLocal(); + private CloseableThreadLocal fieldsStreamTL = new CloseableThreadLocal(); FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0); @@ -155,11 +156,7 @@ final class FieldsReader { if (indexStream != null) { indexStream.close(); } - IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); - if (localFieldsStream != null) { - localFieldsStream.close(); - fieldsStreamTL.set(null); - } + fieldsStreamTL.close(); closed = true; } } diff --git a/src/java/org/apache/lucene/index/SegmentReader.java b/src/java/org/apache/lucene/index/SegmentReader.java index 26c57366fc5..a8a387c844a 100644 --- a/src/java/org/apache/lucene/index/SegmentReader.java +++ b/src/java/org/apache/lucene/index/SegmentReader.java @@ -36,6 +36,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BitVector; +import org.apache.lucene.util.CloseableThreadLocal; /** * @version $Id$ @@ -50,7 +51,7 @@ class SegmentReader extends DirectoryIndexReader { TermInfosReader tis; TermVectorsReader termVectorsReaderOrig = null; - ThreadLocal termVectorsLocal = new ThreadLocal(); + CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal(); BitVector deletedDocs = null; private boolean deletedDocsDirty = false; @@ -616,7 +617,9 @@ class SegmentReader extends DirectoryIndexReader { protected void doClose() throws IOException { boolean hasReferencedReader = (referencedSegmentReader != null); - + + termVectorsLocal.close(); + if (hasReferencedReader) { referencedSegmentReader.decRefReaderNotNorms(); referencedSegmentReader = null; diff --git a/src/java/org/apache/lucene/index/TermInfosReader.java b/src/java/org/apache/lucene/index/TermInfosReader.java index d71c014987d..e2781d5049d 100644 --- a/src/java/org/apache/lucene/index/TermInfosReader.java +++ b/src/java/org/apache/lucene/index/TermInfosReader.java @@ -23,6 +23,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.util.cache.Cache; import org.apache.lucene.util.cache.SimpleLRUCache; +import org.apache.lucene.util.CloseableThreadLocal; /** This stores a monotonically increasing set of pairs in a * Directory. Pairs are accessed either by Term or by ordinal position the @@ -33,7 +34,7 @@ final class TermInfosReader { private String segment; private FieldInfos fieldInfos; - private ThreadLocal threadResources = new ThreadLocal(); + private CloseableThreadLocal threadResources = new CloseableThreadLocal(); private SegmentTermEnum origEnum; private long size; @@ -143,7 +144,7 @@ final class TermInfosReader { origEnum.close(); if (indexEnum != null) indexEnum.close(); - threadResources.set(null); + threadResources.close(); } /** Returns the number of term/value pairs in the set. */ diff --git a/src/java/org/apache/lucene/util/CloseableThreadLocal.java b/src/java/org/apache/lucene/util/CloseableThreadLocal.java new file mode 100644 index 00000000000..23fe527b327 --- /dev/null +++ b/src/java/org/apache/lucene/util/CloseableThreadLocal.java @@ -0,0 +1,88 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Map; +import java.util.HashMap; +import java.util.Iterator; +import java.lang.ref.WeakReference; + +/** Java's builtin ThreadLocal has a serious flaw: + * it can take an arbitrarily long amount of time to + * dereference the things you had stored in it, even once the + * ThreadLocal instance itself is no longer referenced. + * This is because there is single, master map stored for + * each thread, which all ThreadLocals share, and that + * master map only periodically purges "stale" entries. + * + * While not technically a memory leak, because eventually + * the memory will be reclaimed, it can take a long time + * and you can easily hit OutOfMemoryError because from the + * GC's standpoint the stale entries are not reclaimaible. + * + * This class works around that, by only enrolling + * WeakReference values into the ThreadLocal, and + * separately holding a hard reference to each stored + * value. When you call {@link #close}, these hard + * references are cleared and then GC is freely able to + * reclaim space by objects stored in it. */ + +public final class CloseableThreadLocal { + + private ThreadLocal t = new ThreadLocal(); + + private Map hardRefs = new HashMap(); + + public Object get() { + WeakReference weakRef = (WeakReference) t.get(); + if (weakRef == null) + return null; + else { + Object v = weakRef.get(); + // This can never be null, because we hold a hard + // reference to the underlying object: + assert v != null; + return v; + } + } + + public void set(Object object) { + + t.set(new WeakReference(object)); + + synchronized(hardRefs) { + hardRefs.put(Thread.currentThread(), object); + + // Purge dead threads + Iterator it = hardRefs.keySet().iterator(); + while(it.hasNext()) { + Thread t = (Thread) it.next(); + if (!t.isAlive()) + it.remove(); + } + } + } + + public void close() { + // Clear the hard refs; then, the only remaining refs to + // all values we were storing are weak (unless somewhere + // else is still using them) and so GC may reclaim them: + hardRefs = null; + t = null; + } +}