Upgrade to Lucene 3.6, closes #1862.

2012-04-15 17:39:41 +03:00 · 2012-04-15 17:39:41 +03:00 · 16cd159a38
parent bf10ef5b4b
commit 16cd159a38
16 changed files with 63 additions and 1859 deletions
--- a/pom.xml
+++ b/pom.xml
@ -30,7 +30,7 @@
    </parent>

    <properties>
-        <lucene.version>3.5.0</lucene.version>
+        <lucene.version>3.6.0</lucene.version>
    </properties>

    <repositories>
--- a/src/main/java/org/apache/lucene/index/memory/CustomMemoryIndex.java
+++ b/src/main/java/org/apache/lucene/index/memory/CustomMemoryIndex.java
@ -46,10 +46,10 @@ import org.apache.lucene.search.*;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Constants;
+import org.elasticsearch.common.io.FastStringReader;

 import java.io.IOException;
 import java.io.Serializable;
-import java.io.StringReader;
 import java.util.*;

 /**
@ -201,6 +201,8 @@ public class CustomMemoryIndex implements Serializable {

    private static final boolean DEBUG = false;

+    private final FieldInfos fieldInfos;
+
    /**
     * Sorts term entries into ascending order; also works for
     * Arrays.binarySearch() and Arrays.sort()
@ -234,6 +236,7 @@ public class CustomMemoryIndex implements Serializable {
     */
    private CustomMemoryIndex(boolean storeOffsets) {
        this.stride = storeOffsets ? 3 : 1;
+        fieldInfos = new FieldInfos();
    }

    /**
@ -257,8 +260,12 @@ public class CustomMemoryIndex implements Serializable {
        if (analyzer == null)
            throw new IllegalArgumentException("analyzer must not be null");

-        TokenStream stream = analyzer.tokenStream(fieldName,
-                new StringReader(text));
+        TokenStream stream;
+        try {
+            stream = analyzer.reusableTokenStream(fieldName, new FastStringReader(text));
+        } catch (IOException ex) {
+            throw new RuntimeException(ex);
+        }

        addField(fieldName, stream);
    }
@ -338,6 +345,8 @@ public class CustomMemoryIndex implements Serializable {
            int numOverlapTokens = 0;
            int pos = -1;

+            fieldInfos.add(fieldName, true, true);
+
            // CHANGE
            if (fields.get(fieldName) != null) {
                Info info = fields.get(fieldName);
@ -760,13 +769,12 @@ public class CustomMemoryIndex implements Serializable {
     * Search support for Lucene framework integration; implements all methods
     * required by the Lucene IndexReader contracts.
     */
-    private final class MemoryIndexReader extends IndexReader {
+    final class MemoryIndexReader extends IndexReader {

        private Searcher searcher; // needed to find searcher.getSimilarity()

        private MemoryIndexReader() {
            super(); // avoid as much superclass baggage as possible
-            readerFinishedListeners = Collections.synchronizedSet(new HashSet<ReaderFinishedListener>());
        }

        private Info getInfo(String fieldName) {
@ -1174,12 +1182,6 @@ public class CustomMemoryIndex implements Serializable {
            return 1;
        }

-        @Override
-        public Document document(int n) {
-            if (DEBUG) System.err.println("MemoryIndexReader.document");
-            return new Document(); // there are no stored fields
-        }
-
        //When we convert to JDK 1.5 make this Set<String>
        @Override
        public Document document(int n, FieldSelector fieldSelector) throws IOException {
@ -1219,20 +1221,9 @@ public class CustomMemoryIndex implements Serializable {
            if (DEBUG) System.err.println("MemoryIndexReader.doClose");
        }

-        // lucene >= 1.9 (remove this method for lucene-1.4.3)
        @Override
-        public Collection<String> getFieldNames(FieldOption fieldOption) {
-            if (DEBUG) System.err.println("MemoryIndexReader.getFieldNamesOption");
-            if (fieldOption == FieldOption.UNINDEXED)
-                return Collections.<String>emptySet();
-            if (fieldOption == FieldOption.INDEXED_NO_TERMVECTOR)
-                return Collections.<String>emptySet();
-            if (fieldOption == FieldOption.TERMVECTOR_WITH_OFFSET && stride == 1)
-                return Collections.<String>emptySet();
-            if (fieldOption == FieldOption.TERMVECTOR_WITH_POSITION_OFFSET && stride == 1)
-                return Collections.<String>emptySet();
-
-            return Collections.unmodifiableSet(fields.keySet());
+        public FieldInfos getFieldInfos() {
+            return fieldInfos;
        }
    }

--- a/src/main/java/org/elasticsearch/common/lucene/Lucene.java
+++ b/src/main/java/org/elasticsearch/common/lucene/Lucene.java
@ -40,7 +40,7 @@ import java.lang.reflect.Field;
 */
 public class Lucene {

-    public static final Version VERSION = Version.LUCENE_35;
+    public static final Version VERSION = Version.LUCENE_36;
    public static final Version ANALYZER_VERSION = VERSION;
    public static final Version QUERYPARSER_VERSION = VERSION;

@ -55,6 +55,9 @@ public class Lucene {
        if (version == null) {
            return defaultVersion;
        }
+        if ("3.6".equals(version)) {
+            return Version.LUCENE_36;
+        }
        if ("3.5".equals(version)) {
            return Version.LUCENE_35;
        }
--- a/src/main/java/org/elasticsearch/common/lucene/analysis/HTMLStripCharFilter.java
+++ b/src/main/java/org/elasticsearch/common/lucene/analysis/HTMLStripCharFilter.java
--- a/src/main/java/org/elasticsearch/common/lucene/manager/ReferenceManager.java
+++ b/src/main/java/org/elasticsearch/common/lucene/manager/ReferenceManager.java
@ -1,181 +0,0 @@
-package org.elasticsearch.common.lucene.manager;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.store.AlreadyClosedException;
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.concurrent.Semaphore;
-
-/**
- * Utility class to safely share instances of a certain type across multiple
- * threads, while periodically refreshing them. This class ensures each
- * reference is closed only once all threads have finished using it. It is
- * recommended to consult the documentation of {@link ReferenceManager}
- * implementations for their {@link #maybeRefresh()} semantics.
- *
- * @param <G> the concrete type that will be {@link #acquire() acquired} and
- *            {@link #release(Object) released}.
- * @lucene.experimental
- */
-// LUCENE MONITOR: 3.6 Remove this once 3.6 is out and use it
-public abstract class ReferenceManager<G> implements Closeable {
-
-    private static final String REFERENCE_MANAGER_IS_CLOSED_MSG = "this ReferenceManager is closed";
-
-    protected volatile G current;
-
-    private final Semaphore reopenLock = new Semaphore(1);
-
-    private void ensureOpen() {
-        if (current == null) {
-            throw new AlreadyClosedException(REFERENCE_MANAGER_IS_CLOSED_MSG);
-        }
-    }
-
-    private synchronized void swapReference(G newReference) throws IOException {
-        ensureOpen();
-        final G oldReference = current;
-        current = newReference;
-        release(oldReference);
-    }
-
-    /**
-     * Decrement reference counting on the given reference.
-     */
-    protected abstract void decRef(G reference) throws IOException;
-
-    /**
-     * Refresh the given reference if needed. Returns {@code null} if no refresh
-     * was needed, otherwise a new refreshed reference.
-     */
-    protected abstract G refreshIfNeeded(G referenceToRefresh) throws IOException;
-
-    /**
-     * Try to increment reference counting on the given reference. Return true if
-     * the operation was successful.
-     */
-    protected abstract boolean tryIncRef(G reference);
-
-    /**
-     * Obtain the current reference. You must match every call to acquire with one
-     * call to {@link #release}; it's best to do so in a finally clause, and set
-     * the reference to {@code null} to prevent accidental usage after it has been
-     * released.
-     */
-    public final G acquire() {
-        G ref;
-        do {
-            if ((ref = current) == null) {
-                throw new AlreadyClosedException(REFERENCE_MANAGER_IS_CLOSED_MSG);
-            }
-        } while (!tryIncRef(ref));
-        return ref;
-    }
-
-    /**
-     * Close this ReferenceManager to future {@link #acquire() acquiring}. Any
-     * references that were previously {@link #acquire() acquired} won't be
-     * affected, and they should still be {@link #release released} when they are
-     * not needed anymore.
-     */
-    public final synchronized void close() throws IOException {
-        if (current != null) {
-            // make sure we can call this more than once
-            // closeable javadoc says:
-            // if this is already closed then invoking this method has no effect.
-            swapReference(null);
-            afterClose();
-        }
-    }
-
-    /**
-     * Called after close(), so subclass can free any resources.
-     */
-    protected void afterClose() throws IOException {
-    }
-
-    /**
-     * You must call this, periodically, if you want that {@link #acquire()} will
-     * return refreshed instances.
-     * <p/>
-     * <p/>
-     * <b>Threads</b>: it's fine for more than one thread to call this at once.
-     * Only the first thread will attempt the refresh; subsequent threads will see
-     * that another thread is already handling refresh and will return
-     * immediately. Note that this means if another thread is already refreshing
-     * then subsequent threads will return right away without waiting for the
-     * refresh to complete.
-     * <p/>
-     * <p/>
-     * If this method returns true it means the calling thread either refreshed
-     * or that there were no changes to refresh.  If it returns false it means another
-     * thread is currently refreshing.
-     */
-    public final boolean maybeRefresh() throws IOException {
-        ensureOpen();
-
-        // Ensure only 1 thread does reopen at once; other threads just return immediately:
-        final boolean doTryRefresh = reopenLock.tryAcquire();
-        if (doTryRefresh) {
-            try {
-                final G reference = acquire();
-                try {
-                    G newReference = refreshIfNeeded(reference);
-                    if (newReference != null) {
-                        assert newReference != reference : "refreshIfNeeded should return null if refresh wasn't needed";
-                        boolean success = false;
-                        try {
-                            swapReference(newReference);
-                            success = true;
-                        } finally {
-                            if (!success) {
-                                release(newReference);
-                            }
-                        }
-                    }
-                } finally {
-                    release(reference);
-                }
-                afterRefresh();
-            } finally {
-                reopenLock.release();
-            }
-        }
-
-        return doTryRefresh;
-    }
-
-    /**
-     * Called after swapReference has installed a new
-     * instance.
-     */
-    protected void afterRefresh() throws IOException {
-    }
-
-    /**
-     * Release the refernce previously obtained via {@link #acquire()}.
-     * <p/>
-     * <b>NOTE:</b> it's safe to call this after {@link #close()}.
-     */
-    public final void release(G reference) throws IOException {
-        assert reference != null;
-        decRef(reference);
-    }
-}
--- a/src/main/java/org/elasticsearch/common/lucene/manager/SearcherFactory.java
+++ b/src/main/java/org/elasticsearch/common/lucene/manager/SearcherFactory.java
@ -1,56 +0,0 @@
-package org.elasticsearch.common.lucene.manager;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.IndexSearcher;
-
-import java.io.IOException;
-
-/**
- * Factory class used by {@link org.apache.lucene.search.SearcherManager} and {@link org.apache.lucene.search.NRTManager} to
- * create new IndexSearchers. The default implementation just creates
- * an IndexSearcher with no custom behavior:
- * <p/>
- * <pre class="prettyprint">
- * public IndexSearcher newSearcher(IndexReader r) throws IOException {
- * return new IndexSearcher(r);
- * }
- * </pre>
- * <p/>
- * You can pass your own factory instead if you want custom behavior, such as:
- * <ul>
- * <li>Setting a custom scoring model: {@link org.apache.lucene.search.IndexSearcher#setSimilarity(org.apache.lucene.search.Similarity)}
- * <li>Parallel per-segment search: {@link org.apache.lucene.search.IndexSearcher#IndexSearcher(org.apache.lucene.index.IndexReader, java.util.concurrent.ExecutorService)}
- * <li>Return custom subclasses of IndexSearcher (for example that implement distributed scoring)
- * <li>Run queries to warm your IndexSearcher before it is used. Note: when using near-realtime search
- * you may want to also {@link org.apache.lucene.index.IndexWriterConfig#setMergedSegmentWarmer(org.apache.lucene.index.IndexWriter.IndexReaderWarmer)} to warm
- * newly merged segments in the background, outside of the reopen path.
- * </ul>
- *
- * @lucene.experimental
- */
-// LUCENE MONITOR: 3.6 Remove this once 3.6 is out and use it
-public class SearcherFactory {
-    /**
-     * Returns a new IndexSearcher over the given reader.
-     */
-    public IndexSearcher newSearcher(IndexReader reader) throws IOException {
-        return new IndexSearcher(reader);
-    }
-}
--- a/src/main/java/org/elasticsearch/common/lucene/manager/SearcherManager.java
+++ b/src/main/java/org/elasticsearch/common/lucene/manager/SearcherManager.java
@ -1,163 +0,0 @@
-package org.elasticsearch.common.lucene.manager;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.store.Directory;
-
-import java.io.IOException;
-
-/**
- * Utility class to safely share {@link org.apache.lucene.search.IndexSearcher} instances across multiple
- * threads, while periodically reopening. This class ensures each searcher is
- * closed only once all threads have finished using it.
- * <p/>
- * <p/>
- * Use {@link #acquire} to obtain the current searcher, and {@link #release} to
- * release it, like this:
- * <p/>
- * <pre class="prettyprint">
- * IndexSearcher s = manager.acquire();
- * try {
- * // Do searching, doc retrieval, etc. with s
- * } finally {
- * manager.release(s);
- * }
- * // Do not use s after this!
- * s = null;
- * </pre>
- * <p/>
- * <p/>
- * In addition you should periodically call {@link #maybeRefresh}. While it's
- * possible to call this just before running each query, this is discouraged
- * since it penalizes the unlucky queries that do the reopen. It's better to use
- * a separate background thread, that periodically calls maybeReopen. Finally,
- * be sure to call {@link #close} once you are done.
- *
- * @lucene.experimental
- * @see SearcherFactory
- */
-// LUCENE MONITOR: 3.6 Remove this once 3.6 is out and use it
-public final class SearcherManager extends ReferenceManager<IndexSearcher> {
-
-    private final SearcherFactory searcherFactory;
-
-    /**
-     * Creates and returns a new SearcherManager from the given {@link org.apache.lucene.index.IndexWriter}.
-     *
-     * @param writer          the IndexWriter to open the IndexReader from.
-     * @param applyAllDeletes If <code>true</code>, all buffered deletes will
-     *                        be applied (made visible) in the {@link org.apache.lucene.search.IndexSearcher} / {@link org.apache.lucene.index.IndexReader}.
-     *                        If <code>false</code>, the deletes may or may not be applied, but remain buffered
-     *                        (in IndexWriter) so that they will be applied in the future.
-     *                        Applying deletes can be costly, so if your app can tolerate deleted documents
-     *                        being returned you might gain some performance by passing <code>false</code>.
-     *                        See {@link org.apache.lucene.index.IndexReader#openIfChanged(org.apache.lucene.index.IndexReader, org.apache.lucene.index.IndexWriter, boolean)}.
-     * @param searcherFactory An optional {@link SearcherFactory}. Pass
-     *                        <code>null</code> if you don't require the searcher to be warmed
-     *                        before going live or other custom behavior.
-     * @throws java.io.IOException
-     */
-    public SearcherManager(IndexWriter writer, boolean applyAllDeletes, SearcherFactory searcherFactory) throws IOException {
-        if (searcherFactory == null) {
-            searcherFactory = new SearcherFactory();
-        }
-        this.searcherFactory = searcherFactory;
-        current = getSearcher(searcherFactory, IndexReader.open(writer, applyAllDeletes));
-    }
-
-    /**
-     * Creates and returns a new SearcherManager from the given {@link org.apache.lucene.store.Directory}.
-     *
-     * @param dir             the directory to open the DirectoryReader on.
-     * @param searcherFactory An optional {@link SearcherFactory}. Pass
-     *                        <code>null</code> if you don't require the searcher to be warmed
-     *                        before going live or other custom behavior.
-     * @throws java.io.IOException
-     */
-    public SearcherManager(Directory dir, SearcherFactory searcherFactory) throws IOException {
-        if (searcherFactory == null) {
-            searcherFactory = new SearcherFactory();
-        }
-        this.searcherFactory = searcherFactory;
-        current = getSearcher(searcherFactory, IndexReader.open(dir));
-    }
-
-    @Override
-    protected void decRef(IndexSearcher reference) throws IOException {
-        reference.getIndexReader().decRef();
-    }
-
-    @Override
-    protected IndexSearcher refreshIfNeeded(IndexSearcher referenceToRefresh) throws IOException {
-        final IndexReader newReader = IndexReader.openIfChanged(referenceToRefresh.getIndexReader());
-        if (newReader == null) {
-            return null;
-        } else {
-            return getSearcher(searcherFactory, newReader);
-        }
-    }
-
-    @Override
-    protected boolean tryIncRef(IndexSearcher reference) {
-        return reference.getIndexReader().tryIncRef();
-    }
-
-    /**
-     * @deprecated see {@link #maybeRefresh()}.
-     */
-    @Deprecated
-    public boolean maybeReopen() throws IOException {
-        return maybeRefresh();
-    }
-
-    /**
-     * Returns <code>true</code> if no changes have occured since this searcher
-     * ie. reader was opened, otherwise <code>false</code>.
-     *
-     * @see org.apache.lucene.index.IndexReader#isCurrent()
-     */
-    public boolean isSearcherCurrent() throws IOException {
-        final IndexSearcher searcher = acquire();
-        try {
-            return searcher.getIndexReader().isCurrent();
-        } finally {
-            release(searcher);
-        }
-    }
-
-    // NOTE: decRefs incoming reader on throwing an exception
-    static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader) throws IOException {
-        boolean success = false;
-        final IndexSearcher searcher;
-        try {
-            searcher = searcherFactory.newSearcher(reader);
-            if (searcher.getIndexReader() != reader) {
-                throw new IllegalStateException("SearcherFactory must wrap exactly the provided reader (got " + searcher.getIndexReader() + " but expected " + reader + ")");
-            }
-            success = true;
-        } finally {
-            if (!success) {
-                reader.decRef();
-            }
-        }
-        return searcher;
-    }
-}
--- a/src/main/java/org/elasticsearch/index/analysis/HtmlStripCharFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/HtmlStripCharFilterFactory.java
@ -21,9 +21,9 @@ package org.elasticsearch.index.analysis;

 import com.google.common.collect.ImmutableSet;
 import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.inject.assistedinject.Assisted;
-import org.elasticsearch.common.lucene.analysis.HTMLStripCharFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.settings.IndexSettings;
@ -35,12 +35,9 @@ public class HtmlStripCharFilterFactory extends AbstractCharFilterFactory {

    private final ImmutableSet<String> escapedTags;

-    private final int readAheadLimit;
-
    @Inject
    public HtmlStripCharFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
        super(index, indexSettings, name);
-        this.readAheadLimit = settings.getAsInt("read_ahead", HTMLStripCharFilter.DEFAULT_READ_AHEAD);
        String[] escapedTags = settings.getAsArray("escaped_tags");
        if (escapedTags.length > 0) {
            this.escapedTags = ImmutableSet.copyOf(escapedTags);
@ -53,12 +50,8 @@ public class HtmlStripCharFilterFactory extends AbstractCharFilterFactory {
        return escapedTags;
    }

-    public int readAheadLimit() {
-        return readAheadLimit;
-    }
-
    @Override
    public CharStream create(CharStream tokenStream) {
-        return new HTMLStripCharFilter(tokenStream, escapedTags, readAheadLimit);
+        return new HTMLStripCharFilter(tokenStream, escapedTags);
    }
 }
--- a/src/main/java/org/elasticsearch/index/cache/bloom/simple/SimpleBloomCache.java
+++ b/src/main/java/org/elasticsearch/index/cache/bloom/simple/SimpleBloomCache.java
@ -19,10 +19,7 @@

 package org.elasticsearch.index.cache.bloom.simple;

-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.*;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.UnicodeUtil;
@ -48,7 +45,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 /**
 *
 */
-public class SimpleBloomCache extends AbstractIndexComponent implements BloomCache, IndexReader.ReaderFinishedListener {
+public class SimpleBloomCache extends AbstractIndexComponent implements BloomCache, SegmentReader.CoreClosedListener {

    private final ThreadPool threadPool;

@ -78,8 +75,8 @@ public class SimpleBloomCache extends AbstractIndexComponent implements BloomCac
    }

    @Override
-    public void finished(IndexReader reader) {
-        clear(reader);
+    public void onClose(SegmentReader owner) {
+        clear(owner);
    }

    @Override
@ -126,7 +123,7 @@ public class SimpleBloomCache extends AbstractIndexComponent implements BloomCac
            synchronized (creationMutex) {
                fieldCache = cache.get(reader.getCoreCacheKey());
                if (fieldCache == null) {
-                    reader.addReaderFinishedListener(this);
+                    ((SegmentReader) reader).addCoreClosedListener(this);
                    fieldCache = ConcurrentCollections.newConcurrentMap();
                    cache.put(reader.getCoreCacheKey(), fieldCache);
                }
--- a/src/main/java/org/elasticsearch/index/cache/field/data/support/AbstractConcurrentMapFieldDataCache.java
+++ b/src/main/java/org/elasticsearch/index/cache/field/data/support/AbstractConcurrentMapFieldDataCache.java
@ -21,6 +21,7 @@ package org.elasticsearch.index.cache.field.data.support;

 import com.google.common.cache.Cache;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.SegmentReader;
 import org.elasticsearch.ElasticSearchException;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
@ -38,7 +39,7 @@ import java.util.concurrent.ConcurrentMap;
 /**
 *
 */
-public abstract class AbstractConcurrentMapFieldDataCache extends AbstractIndexComponent implements FieldDataCache, IndexReader.ReaderFinishedListener {
+public abstract class AbstractConcurrentMapFieldDataCache extends AbstractIndexComponent implements FieldDataCache, SegmentReader.CoreClosedListener {

    private final ConcurrentMap<Object, Cache<String, FieldData>> cache;

@ -67,8 +68,8 @@ public abstract class AbstractConcurrentMapFieldDataCache extends AbstractIndexC
    }

    @Override
-    public void finished(IndexReader reader) {
-        clear(reader);
+    public void onClose(SegmentReader owner) {
+        clear(owner);
    }

    @Override
@ -108,7 +109,7 @@ public abstract class AbstractConcurrentMapFieldDataCache extends AbstractIndexC
                fieldDataCache = cache.get(reader.getCoreCacheKey());
                if (fieldDataCache == null) {
                    fieldDataCache = buildFieldDataMap();
-                    reader.addReaderFinishedListener(this);
+                    ((SegmentReader) reader).addCoreClosedListener(this);
                    cache.put(reader.getCoreCacheKey(), fieldDataCache);
                }
            }
--- a/src/main/java/org/elasticsearch/index/cache/filter/weighted/WeightedFilterCache.java
+++ b/src/main/java/org/elasticsearch/index/cache/filter/weighted/WeightedFilterCache.java
@ -24,6 +24,7 @@ import com.google.common.cache.RemovalListener;
 import com.google.common.cache.RemovalNotification;
 import com.google.common.cache.Weigher;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.Filter;
 import org.elasticsearch.ElasticSearchException;
@ -45,7 +46,7 @@ import org.elasticsearch.indices.cache.filter.IndicesFilterCache;
 import java.io.IOException;
 import java.util.concurrent.ConcurrentMap;

-public class WeightedFilterCache extends AbstractIndexComponent implements FilterCache, IndexReader.ReaderFinishedListener, RemovalListener<WeightedFilterCache.FilterCacheKey, FilterCacheValue<DocSet>> {
+public class WeightedFilterCache extends AbstractIndexComponent implements FilterCache, SegmentReader.CoreClosedListener, RemovalListener<WeightedFilterCache.FilterCacheKey, FilterCacheValue<DocSet>> {

    final IndicesFilterCache indicesFilterCache;

@ -91,8 +92,8 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
    }

    @Override
-    public void finished(IndexReader reader) {
-        clear(reader);
+    public void onClose(SegmentReader owner) {
+        clear(owner);
    }

    @Override
@ -165,7 +166,7 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
                if (!cache.seenReaders.containsKey(reader.getCoreCacheKey())) {
                    Boolean previous = cache.seenReaders.putIfAbsent(reader.getCoreCacheKey(), Boolean.TRUE);
                    if (previous == null) {
-                        reader.addReaderFinishedListener(cache);
+                        ((SegmentReader) reader).addCoreClosedListener(cache);
                        cache.seenReadersCount.inc();
                    }
                }
--- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java
+++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java
@ -20,10 +20,7 @@
 package org.elasticsearch.index.cache.id.simple;

 import gnu.trove.impl.Constants;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.*;
 import org.apache.lucene.util.StringHelper;
 import org.elasticsearch.ElasticSearchException;
 import org.elasticsearch.common.BytesWrap;
@ -50,7 +47,7 @@ import java.util.concurrent.ConcurrentMap;
 /**
 *
 */
-public class SimpleIdCache extends AbstractIndexComponent implements IdCache, IndexReader.ReaderFinishedListener {
+public class SimpleIdCache extends AbstractIndexComponent implements IdCache, SegmentReader.CoreClosedListener {

    private final ConcurrentMap<Object, SimpleIdReaderCache> idReaders;

@ -71,8 +68,8 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, In
    }

    @Override
-    public void finished(IndexReader reader) {
-        clear(reader);
+    public void onClose(SegmentReader owner) {
+        clear(owner);
    }

    @Override
@ -112,7 +109,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, In
                        continue;
                    }

-                    reader.addReaderFinishedListener(this);
+                    ((SegmentReader) reader).addCoreClosedListener(this);
                    HashMap<String, TypeBuilder> readerBuilder = new HashMap<String, TypeBuilder>();
                    builders.put(reader.getCoreCacheKey(), readerBuilder);

--- a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java
+++ b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java
@ -20,9 +20,7 @@
 package org.elasticsearch.index.engine.robin;

 import org.apache.lucene.index.*;
-import org.apache.lucene.search.FilteredQuery;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
+import org.apache.lucene.search.*;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.util.UnicodeUtil;
 import org.elasticsearch.ElasticSearchException;
@ -33,8 +31,6 @@ import org.elasticsearch.common.bloom.BloomFilter;
 import org.elasticsearch.common.collect.MapBuilder;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.lucene.Lucene;
-import org.elasticsearch.common.lucene.manager.SearcherFactory;
-import org.elasticsearch.common.lucene.manager.SearcherManager;
 import org.elasticsearch.common.lucene.uid.UidField;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.ByteSizeUnit;
--- a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java
+++ b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
 import org.apache.lucene.analysis.br.BrazilianAnalyzer;
 import org.apache.lucene.analysis.br.BrazilianStemFilter;
 import org.apache.lucene.analysis.ca.CatalanAnalyzer;
+import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
 import org.apache.lucene.analysis.cn.ChineseAnalyzer;
 import org.apache.lucene.analysis.cz.CzechAnalyzer;
@ -43,6 +44,7 @@ import org.apache.lucene.analysis.fi.FinnishAnalyzer;
 import org.apache.lucene.analysis.fr.ElisionFilter;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
 import org.apache.lucene.analysis.fr.FrenchStemFilter;
+import org.apache.lucene.analysis.ga.IrishAnalyzer;
 import org.apache.lucene.analysis.gl.GalicianAnalyzer;
 import org.apache.lucene.analysis.hi.HindiAnalyzer;
 import org.apache.lucene.analysis.hu.HungarianAnalyzer;
@ -76,7 +78,6 @@ import org.elasticsearch.ElasticSearchIllegalStateException;
 import org.elasticsearch.common.component.AbstractComponent;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.lucene.Lucene;
-import org.elasticsearch.common.lucene.analysis.HTMLStripCharFilter;
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
@ -141,6 +142,7 @@ public class IndicesAnalysisService extends AbstractComponent {
        analyzerProviderFactories.put("hindi", new PreBuiltAnalyzerProviderFactory("hindi", AnalyzerScope.INDICES, new HindiAnalyzer(Lucene.ANALYZER_VERSION)));
        analyzerProviderFactories.put("hungarian", new PreBuiltAnalyzerProviderFactory("hungarian", AnalyzerScope.INDICES, new HungarianAnalyzer(Lucene.ANALYZER_VERSION)));
        analyzerProviderFactories.put("indonesian", new PreBuiltAnalyzerProviderFactory("indonesian", AnalyzerScope.INDICES, new IndonesianAnalyzer(Lucene.ANALYZER_VERSION)));
+        analyzerProviderFactories.put("irish", new PreBuiltAnalyzerProviderFactory("irish", AnalyzerScope.INDICES, new IrishAnalyzer(Lucene.ANALYZER_VERSION)));
        analyzerProviderFactories.put("italian", new PreBuiltAnalyzerProviderFactory("italian", AnalyzerScope.INDICES, new ItalianAnalyzer(Lucene.ANALYZER_VERSION)));
        analyzerProviderFactories.put("latvian", new PreBuiltAnalyzerProviderFactory("latvian", AnalyzerScope.INDICES, new LatvianAnalyzer(Lucene.ANALYZER_VERSION)));
        analyzerProviderFactories.put("norwegian", new PreBuiltAnalyzerProviderFactory("norwegian", AnalyzerScope.INDICES, new NorwegianAnalyzer(Lucene.ANALYZER_VERSION)));
--- a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java
+++ b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java
@ -162,7 +162,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(search.hits().hits().length, equalTo(5));

        for (SearchHit hit : search.hits()) {
-            assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch "));
+            assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
        }

        search = client.prepareSearch()
@ -176,7 +176,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(search.hits().hits().length, equalTo(5));

        for (SearchHit hit : search.hits()) {
-            assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2 "));
+            assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2"));
        }
    }

@ -267,8 +267,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
        assertThat(searchResponse.hits().totalHits(), equalTo(1l));

-        // LUCENE 3.1 UPGRADE: Caused adding the space at the end...
-        assertThat(searchResponse.hits().getAt(0).highlightFields().get("field1").fragments()[0], equalTo("this is a <xxx>test</xxx> "));
+        assertThat(searchResponse.hits().getAt(0).highlightFields().get("field1").fragments()[0], equalTo("this is a <xxx>test</xxx>"));

        logger.info("--> searching on _all, highlighting on field1");
        source = searchSource()
@ -281,7 +280,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(searchResponse.hits().totalHits(), equalTo(1l));

        // LUCENE 3.1 UPGRADE: Caused adding the space at the end...
-        assertThat(searchResponse.hits().getAt(0).highlightFields().get("field1").fragments()[0], equalTo("this is a <xxx>test</xxx> "));
+        assertThat(searchResponse.hits().getAt(0).highlightFields().get("field1").fragments()[0], equalTo("this is a <xxx>test</xxx>"));

        logger.info("--> searching on _all, highlighting on field2");
        source = searchSource()
@ -294,7 +293,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(searchResponse.hits().totalHits(), equalTo(1l));

        // LUCENE 3.1 UPGRADE: Caused adding the space at the end...
-        assertThat(searchResponse.hits().getAt(0).highlightFields().get("field2").fragments()[0], equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog "));
+        assertThat(searchResponse.hits().getAt(0).highlightFields().get("field2").fragments()[0], equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));

        logger.info("--> searching on _all, highlighting on field2");
        source = searchSource()
@ -307,7 +306,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(searchResponse.hits().totalHits(), equalTo(1l));

        // LUCENE 3.1 UPGRADE: Caused adding the space at the end...
-        assertThat(searchResponse.hits().getAt(0).highlightFields().get("field2").fragments()[0], equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog "));
+        assertThat(searchResponse.hits().getAt(0).highlightFields().get("field2").fragments()[0], equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
    }

    @Test
@ -342,7 +341,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(searchResponse.hits().hits().length, equalTo(COUNT));
        for (SearchHit hit : searchResponse.hits()) {
            // LUCENE 3.1 UPGRADE: Caused adding the space at the end...
-            assertThat(hit.highlightFields().get("field1").fragments()[0], equalTo("<em>test</em> " + hit.id() + " "));
+            assertThat(hit.highlightFields().get("field1").fragments()[0], equalTo("<em>test</em> " + hit.id()));
        }

        logger.info("--> searching explicitly on field1 and highlighting on it, with DFS");
@ -355,8 +354,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(searchResponse.hits().totalHits(), equalTo((long) COUNT));
        assertThat(searchResponse.hits().hits().length, equalTo(COUNT));
        for (SearchHit hit : searchResponse.hits()) {
-            // LUCENE 3.1 UPGRADE: Caused adding the space at the end...
-            assertThat(hit.highlightFields().get("field1").fragments()[0], equalTo("<em>test</em> " + hit.id() + " "));
+            assertThat(hit.highlightFields().get("field1").fragments()[0], equalTo("<em>test</em> " + hit.id()));
        }

        logger.info("--> searching explicitly _all and highlighting on _all");
@ -368,8 +366,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(searchResponse.hits().totalHits(), equalTo((long) COUNT));
        assertThat(searchResponse.hits().hits().length, equalTo(COUNT));
        for (SearchHit hit : searchResponse.hits()) {
-            // LUCENE 3.1 UPGRADE: Caused adding the space at the end...
-            assertThat(hit.highlightFields().get("_all").fragments()[0], equalTo("<em>test</em> " + hit.id() + "  "));
+            assertThat(hit.highlightFields().get("_all").fragments()[0], equalTo("<em>test</em> " + hit.id() + " "));
        }
    }

@ -413,7 +410,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {

        for (SearchHit hit : search.hits()) {
            // LUCENE 3.1 UPGRADE: Caused adding the space at the end...
-            assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch "));
+            assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
        }
    }

@ -448,7 +445,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {

        for (SearchHit hit : search.hits()) {
            // LUCENE 3.1 UPGRADE: Caused adding the space at the end...
-            assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("highlighting <em>bug</em> present in elasticsearch "));
+            assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("highlighting <em>bug</em> present in elasticsearch"));
        }
    }

@ -523,8 +520,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(search.hits().hits().length, equalTo(5));

        for (SearchHit hit : search.hits()) {
-            // LUCENE 3.1 UPGRADE: Caused adding the space at the end...
-            assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("highlighting <em>test</em> for *&amp;? elasticsearch "));
+            assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("highlighting <em>test</em> for *&amp;? elasticsearch"));
        }
    }

@ -555,7 +551,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));

        SearchHit hit = search.hits().getAt(0);
-        assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("this is a <em>test</em> "));
+        assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("this is a <em>test</em>"));

        // search on title.key and highlight on title
        search = client.prepareSearch()
@ -566,7 +562,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));

        hit = search.hits().getAt(0);
-        assertThat(hit.highlightFields().get("title.key").fragments()[0], equalTo("<em>this</em> <em>is</em> <em>a</em> <em>test</em> "));
+        assertThat(hit.highlightFields().get("title.key").fragments()[0], equalTo("<em>this</em> <em>is</em> <em>a</em> <em>test</em>"));
    }

    @Test
@ -596,7 +592,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));

        SearchHit hit = search.hits().getAt(0);
-        assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("this is a <em>test</em> "));
+        assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("this is a <em>test</em>"));

        // search on title.key and highlight on title.key
        search = client.prepareSearch()
@ -607,7 +603,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
        assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));

        hit = search.hits().getAt(0);
-        assertThat(hit.highlightFields().get("title.key").fragments()[0], equalTo("<em>this</em> <em>is</em> <em>a</em> <em>test</em> "));
+        assertThat(hit.highlightFields().get("title.key").fragments()[0], equalTo("<em>this</em> <em>is</em> <em>a</em> <em>test</em>"));
    }

    @Test
--- a/src/test/java/org/elasticsearch/test/unit/deps/lucene/VectorHighlighterTests.java
+++ b/src/test/java/org/elasticsearch/test/unit/deps/lucene/VectorHighlighterTests.java
@ -60,7 +60,7 @@ public class VectorHighlighterTests {
        String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
                reader, topDocs.scoreDocs[0].doc, "content", 30);
        assertThat(fragment, notNullValue());
-        assertThat(fragment, equalTo("e big <b>bad</b> dog "));
+        assertThat(fragment, equalTo("the big <b>bad</b> dog"));
    }

    @Test