Upgrade to Lucene 3.6, closes #1862.
This commit is contained in:
parent
bf10ef5b4b
commit
16cd159a38
2
pom.xml
2
pom.xml
|
@ -30,7 +30,7 @@
|
|||
</parent>
|
||||
|
||||
<properties>
|
||||
<lucene.version>3.5.0</lucene.version>
|
||||
<lucene.version>3.6.0</lucene.version>
|
||||
</properties>
|
||||
|
||||
<repositories>
|
||||
|
|
|
@ -46,10 +46,10 @@ import org.apache.lucene.search.*;
|
|||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.elasticsearch.common.io.FastStringReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.io.StringReader;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
@ -201,6 +201,8 @@ public class CustomMemoryIndex implements Serializable {
|
|||
|
||||
private static final boolean DEBUG = false;
|
||||
|
||||
private final FieldInfos fieldInfos;
|
||||
|
||||
/**
|
||||
* Sorts term entries into ascending order; also works for
|
||||
* Arrays.binarySearch() and Arrays.sort()
|
||||
|
@ -234,6 +236,7 @@ public class CustomMemoryIndex implements Serializable {
|
|||
*/
|
||||
private CustomMemoryIndex(boolean storeOffsets) {
|
||||
this.stride = storeOffsets ? 3 : 1;
|
||||
fieldInfos = new FieldInfos();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -257,8 +260,12 @@ public class CustomMemoryIndex implements Serializable {
|
|||
if (analyzer == null)
|
||||
throw new IllegalArgumentException("analyzer must not be null");
|
||||
|
||||
TokenStream stream = analyzer.tokenStream(fieldName,
|
||||
new StringReader(text));
|
||||
TokenStream stream;
|
||||
try {
|
||||
stream = analyzer.reusableTokenStream(fieldName, new FastStringReader(text));
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
addField(fieldName, stream);
|
||||
}
|
||||
|
@ -338,6 +345,8 @@ public class CustomMemoryIndex implements Serializable {
|
|||
int numOverlapTokens = 0;
|
||||
int pos = -1;
|
||||
|
||||
fieldInfos.add(fieldName, true, true);
|
||||
|
||||
// CHANGE
|
||||
if (fields.get(fieldName) != null) {
|
||||
Info info = fields.get(fieldName);
|
||||
|
@ -760,13 +769,12 @@ public class CustomMemoryIndex implements Serializable {
|
|||
* Search support for Lucene framework integration; implements all methods
|
||||
* required by the Lucene IndexReader contracts.
|
||||
*/
|
||||
private final class MemoryIndexReader extends IndexReader {
|
||||
final class MemoryIndexReader extends IndexReader {
|
||||
|
||||
private Searcher searcher; // needed to find searcher.getSimilarity()
|
||||
|
||||
private MemoryIndexReader() {
|
||||
super(); // avoid as much superclass baggage as possible
|
||||
readerFinishedListeners = Collections.synchronizedSet(new HashSet<ReaderFinishedListener>());
|
||||
}
|
||||
|
||||
private Info getInfo(String fieldName) {
|
||||
|
@ -1174,12 +1182,6 @@ public class CustomMemoryIndex implements Serializable {
|
|||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document document(int n) {
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.document");
|
||||
return new Document(); // there are no stored fields
|
||||
}
|
||||
|
||||
//When we convert to JDK 1.5 make this Set<String>
|
||||
@Override
|
||||
public Document document(int n, FieldSelector fieldSelector) throws IOException {
|
||||
|
@ -1219,20 +1221,9 @@ public class CustomMemoryIndex implements Serializable {
|
|||
if (DEBUG) System.err.println("MemoryIndexReader.doClose");
|
||||
}
|
||||
|
||||
// lucene >= 1.9 (remove this method for lucene-1.4.3)
|
||||
@Override
|
||||
public Collection<String> getFieldNames(FieldOption fieldOption) {
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.getFieldNamesOption");
|
||||
if (fieldOption == FieldOption.UNINDEXED)
|
||||
return Collections.<String>emptySet();
|
||||
if (fieldOption == FieldOption.INDEXED_NO_TERMVECTOR)
|
||||
return Collections.<String>emptySet();
|
||||
if (fieldOption == FieldOption.TERMVECTOR_WITH_OFFSET && stride == 1)
|
||||
return Collections.<String>emptySet();
|
||||
if (fieldOption == FieldOption.TERMVECTOR_WITH_POSITION_OFFSET && stride == 1)
|
||||
return Collections.<String>emptySet();
|
||||
|
||||
return Collections.unmodifiableSet(fields.keySet());
|
||||
public FieldInfos getFieldInfos() {
|
||||
return fieldInfos;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ import java.lang.reflect.Field;
|
|||
*/
|
||||
public class Lucene {
|
||||
|
||||
public static final Version VERSION = Version.LUCENE_35;
|
||||
public static final Version VERSION = Version.LUCENE_36;
|
||||
public static final Version ANALYZER_VERSION = VERSION;
|
||||
public static final Version QUERYPARSER_VERSION = VERSION;
|
||||
|
||||
|
@ -55,6 +55,9 @@ public class Lucene {
|
|||
if (version == null) {
|
||||
return defaultVersion;
|
||||
}
|
||||
if ("3.6".equals(version)) {
|
||||
return Version.LUCENE_36;
|
||||
}
|
||||
if ("3.5".equals(version)) {
|
||||
return Version.LUCENE_35;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,181 +0,0 @@
|
|||
package org.elasticsearch.common.lucene.manager;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.Semaphore;
|
||||
|
||||
/**
|
||||
* Utility class to safely share instances of a certain type across multiple
|
||||
* threads, while periodically refreshing them. This class ensures each
|
||||
* reference is closed only once all threads have finished using it. It is
|
||||
* recommended to consult the documentation of {@link ReferenceManager}
|
||||
* implementations for their {@link #maybeRefresh()} semantics.
|
||||
*
|
||||
* @param <G> the concrete type that will be {@link #acquire() acquired} and
|
||||
* {@link #release(Object) released}.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
// LUCENE MONITOR: 3.6 Remove this once 3.6 is out and use it
|
||||
public abstract class ReferenceManager<G> implements Closeable {
|
||||
|
||||
private static final String REFERENCE_MANAGER_IS_CLOSED_MSG = "this ReferenceManager is closed";
|
||||
|
||||
protected volatile G current;
|
||||
|
||||
private final Semaphore reopenLock = new Semaphore(1);
|
||||
|
||||
private void ensureOpen() {
|
||||
if (current == null) {
|
||||
throw new AlreadyClosedException(REFERENCE_MANAGER_IS_CLOSED_MSG);
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized void swapReference(G newReference) throws IOException {
|
||||
ensureOpen();
|
||||
final G oldReference = current;
|
||||
current = newReference;
|
||||
release(oldReference);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrement reference counting on the given reference.
|
||||
*/
|
||||
protected abstract void decRef(G reference) throws IOException;
|
||||
|
||||
/**
|
||||
* Refresh the given reference if needed. Returns {@code null} if no refresh
|
||||
* was needed, otherwise a new refreshed reference.
|
||||
*/
|
||||
protected abstract G refreshIfNeeded(G referenceToRefresh) throws IOException;
|
||||
|
||||
/**
|
||||
* Try to increment reference counting on the given reference. Return true if
|
||||
* the operation was successful.
|
||||
*/
|
||||
protected abstract boolean tryIncRef(G reference);
|
||||
|
||||
/**
|
||||
* Obtain the current reference. You must match every call to acquire with one
|
||||
* call to {@link #release}; it's best to do so in a finally clause, and set
|
||||
* the reference to {@code null} to prevent accidental usage after it has been
|
||||
* released.
|
||||
*/
|
||||
public final G acquire() {
|
||||
G ref;
|
||||
do {
|
||||
if ((ref = current) == null) {
|
||||
throw new AlreadyClosedException(REFERENCE_MANAGER_IS_CLOSED_MSG);
|
||||
}
|
||||
} while (!tryIncRef(ref));
|
||||
return ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close this ReferenceManager to future {@link #acquire() acquiring}. Any
|
||||
* references that were previously {@link #acquire() acquired} won't be
|
||||
* affected, and they should still be {@link #release released} when they are
|
||||
* not needed anymore.
|
||||
*/
|
||||
public final synchronized void close() throws IOException {
|
||||
if (current != null) {
|
||||
// make sure we can call this more than once
|
||||
// closeable javadoc says:
|
||||
// if this is already closed then invoking this method has no effect.
|
||||
swapReference(null);
|
||||
afterClose();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after close(), so subclass can free any resources.
|
||||
*/
|
||||
protected void afterClose() throws IOException {
|
||||
}
|
||||
|
||||
/**
|
||||
* You must call this, periodically, if you want that {@link #acquire()} will
|
||||
* return refreshed instances.
|
||||
* <p/>
|
||||
* <p/>
|
||||
* <b>Threads</b>: it's fine for more than one thread to call this at once.
|
||||
* Only the first thread will attempt the refresh; subsequent threads will see
|
||||
* that another thread is already handling refresh and will return
|
||||
* immediately. Note that this means if another thread is already refreshing
|
||||
* then subsequent threads will return right away without waiting for the
|
||||
* refresh to complete.
|
||||
* <p/>
|
||||
* <p/>
|
||||
* If this method returns true it means the calling thread either refreshed
|
||||
* or that there were no changes to refresh. If it returns false it means another
|
||||
* thread is currently refreshing.
|
||||
*/
|
||||
public final boolean maybeRefresh() throws IOException {
|
||||
ensureOpen();
|
||||
|
||||
// Ensure only 1 thread does reopen at once; other threads just return immediately:
|
||||
final boolean doTryRefresh = reopenLock.tryAcquire();
|
||||
if (doTryRefresh) {
|
||||
try {
|
||||
final G reference = acquire();
|
||||
try {
|
||||
G newReference = refreshIfNeeded(reference);
|
||||
if (newReference != null) {
|
||||
assert newReference != reference : "refreshIfNeeded should return null if refresh wasn't needed";
|
||||
boolean success = false;
|
||||
try {
|
||||
swapReference(newReference);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
release(newReference);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
release(reference);
|
||||
}
|
||||
afterRefresh();
|
||||
} finally {
|
||||
reopenLock.release();
|
||||
}
|
||||
}
|
||||
|
||||
return doTryRefresh;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after swapReference has installed a new
|
||||
* instance.
|
||||
*/
|
||||
protected void afterRefresh() throws IOException {
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the refernce previously obtained via {@link #acquire()}.
|
||||
* <p/>
|
||||
* <b>NOTE:</b> it's safe to call this after {@link #close()}.
|
||||
*/
|
||||
public final void release(G reference) throws IOException {
|
||||
assert reference != null;
|
||||
decRef(reference);
|
||||
}
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
package org.elasticsearch.common.lucene.manager;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Factory class used by {@link org.apache.lucene.search.SearcherManager} and {@link org.apache.lucene.search.NRTManager} to
|
||||
* create new IndexSearchers. The default implementation just creates
|
||||
* an IndexSearcher with no custom behavior:
|
||||
* <p/>
|
||||
* <pre class="prettyprint">
|
||||
* public IndexSearcher newSearcher(IndexReader r) throws IOException {
|
||||
* return new IndexSearcher(r);
|
||||
* }
|
||||
* </pre>
|
||||
* <p/>
|
||||
* You can pass your own factory instead if you want custom behavior, such as:
|
||||
* <ul>
|
||||
* <li>Setting a custom scoring model: {@link org.apache.lucene.search.IndexSearcher#setSimilarity(org.apache.lucene.search.Similarity)}
|
||||
* <li>Parallel per-segment search: {@link org.apache.lucene.search.IndexSearcher#IndexSearcher(org.apache.lucene.index.IndexReader, java.util.concurrent.ExecutorService)}
|
||||
* <li>Return custom subclasses of IndexSearcher (for example that implement distributed scoring)
|
||||
* <li>Run queries to warm your IndexSearcher before it is used. Note: when using near-realtime search
|
||||
* you may want to also {@link org.apache.lucene.index.IndexWriterConfig#setMergedSegmentWarmer(org.apache.lucene.index.IndexWriter.IndexReaderWarmer)} to warm
|
||||
* newly merged segments in the background, outside of the reopen path.
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
// LUCENE MONITOR: 3.6 Remove this once 3.6 is out and use it
|
||||
public class SearcherFactory {
|
||||
/**
|
||||
* Returns a new IndexSearcher over the given reader.
|
||||
*/
|
||||
public IndexSearcher newSearcher(IndexReader reader) throws IOException {
|
||||
return new IndexSearcher(reader);
|
||||
}
|
||||
}
|
|
@ -1,163 +0,0 @@
|
|||
package org.elasticsearch.common.lucene.manager;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Utility class to safely share {@link org.apache.lucene.search.IndexSearcher} instances across multiple
|
||||
* threads, while periodically reopening. This class ensures each searcher is
|
||||
* closed only once all threads have finished using it.
|
||||
* <p/>
|
||||
* <p/>
|
||||
* Use {@link #acquire} to obtain the current searcher, and {@link #release} to
|
||||
* release it, like this:
|
||||
* <p/>
|
||||
* <pre class="prettyprint">
|
||||
* IndexSearcher s = manager.acquire();
|
||||
* try {
|
||||
* // Do searching, doc retrieval, etc. with s
|
||||
* } finally {
|
||||
* manager.release(s);
|
||||
* }
|
||||
* // Do not use s after this!
|
||||
* s = null;
|
||||
* </pre>
|
||||
* <p/>
|
||||
* <p/>
|
||||
* In addition you should periodically call {@link #maybeRefresh}. While it's
|
||||
* possible to call this just before running each query, this is discouraged
|
||||
* since it penalizes the unlucky queries that do the reopen. It's better to use
|
||||
* a separate background thread, that periodically calls maybeReopen. Finally,
|
||||
* be sure to call {@link #close} once you are done.
|
||||
*
|
||||
* @lucene.experimental
|
||||
* @see SearcherFactory
|
||||
*/
|
||||
// LUCENE MONITOR: 3.6 Remove this once 3.6 is out and use it
|
||||
public final class SearcherManager extends ReferenceManager<IndexSearcher> {
|
||||
|
||||
private final SearcherFactory searcherFactory;
|
||||
|
||||
/**
|
||||
* Creates and returns a new SearcherManager from the given {@link org.apache.lucene.index.IndexWriter}.
|
||||
*
|
||||
* @param writer the IndexWriter to open the IndexReader from.
|
||||
* @param applyAllDeletes If <code>true</code>, all buffered deletes will
|
||||
* be applied (made visible) in the {@link org.apache.lucene.search.IndexSearcher} / {@link org.apache.lucene.index.IndexReader}.
|
||||
* If <code>false</code>, the deletes may or may not be applied, but remain buffered
|
||||
* (in IndexWriter) so that they will be applied in the future.
|
||||
* Applying deletes can be costly, so if your app can tolerate deleted documents
|
||||
* being returned you might gain some performance by passing <code>false</code>.
|
||||
* See {@link org.apache.lucene.index.IndexReader#openIfChanged(org.apache.lucene.index.IndexReader, org.apache.lucene.index.IndexWriter, boolean)}.
|
||||
* @param searcherFactory An optional {@link SearcherFactory}. Pass
|
||||
* <code>null</code> if you don't require the searcher to be warmed
|
||||
* before going live or other custom behavior.
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public SearcherManager(IndexWriter writer, boolean applyAllDeletes, SearcherFactory searcherFactory) throws IOException {
|
||||
if (searcherFactory == null) {
|
||||
searcherFactory = new SearcherFactory();
|
||||
}
|
||||
this.searcherFactory = searcherFactory;
|
||||
current = getSearcher(searcherFactory, IndexReader.open(writer, applyAllDeletes));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and returns a new SearcherManager from the given {@link org.apache.lucene.store.Directory}.
|
||||
*
|
||||
* @param dir the directory to open the DirectoryReader on.
|
||||
* @param searcherFactory An optional {@link SearcherFactory}. Pass
|
||||
* <code>null</code> if you don't require the searcher to be warmed
|
||||
* before going live or other custom behavior.
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public SearcherManager(Directory dir, SearcherFactory searcherFactory) throws IOException {
|
||||
if (searcherFactory == null) {
|
||||
searcherFactory = new SearcherFactory();
|
||||
}
|
||||
this.searcherFactory = searcherFactory;
|
||||
current = getSearcher(searcherFactory, IndexReader.open(dir));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void decRef(IndexSearcher reference) throws IOException {
|
||||
reference.getIndexReader().decRef();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexSearcher refreshIfNeeded(IndexSearcher referenceToRefresh) throws IOException {
|
||||
final IndexReader newReader = IndexReader.openIfChanged(referenceToRefresh.getIndexReader());
|
||||
if (newReader == null) {
|
||||
return null;
|
||||
} else {
|
||||
return getSearcher(searcherFactory, newReader);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean tryIncRef(IndexSearcher reference) {
|
||||
return reference.getIndexReader().tryIncRef();
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated see {@link #maybeRefresh()}.
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean maybeReopen() throws IOException {
|
||||
return maybeRefresh();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> if no changes have occured since this searcher
|
||||
* ie. reader was opened, otherwise <code>false</code>.
|
||||
*
|
||||
* @see org.apache.lucene.index.IndexReader#isCurrent()
|
||||
*/
|
||||
public boolean isSearcherCurrent() throws IOException {
|
||||
final IndexSearcher searcher = acquire();
|
||||
try {
|
||||
return searcher.getIndexReader().isCurrent();
|
||||
} finally {
|
||||
release(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: decRefs incoming reader on throwing an exception
|
||||
static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader) throws IOException {
|
||||
boolean success = false;
|
||||
final IndexSearcher searcher;
|
||||
try {
|
||||
searcher = searcherFactory.newSearcher(reader);
|
||||
if (searcher.getIndexReader() != reader) {
|
||||
throw new IllegalStateException("SearcherFactory must wrap exactly the provided reader (got " + searcher.getIndexReader() + " but expected " + reader + ")");
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
reader.decRef();
|
||||
}
|
||||
}
|
||||
return searcher;
|
||||
}
|
||||
}
|
|
@ -21,9 +21,9 @@ package org.elasticsearch.index.analysis;
|
|||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.analysis.HTMLStripCharFilter;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -35,12 +35,9 @@ public class HtmlStripCharFilterFactory extends AbstractCharFilterFactory {
|
|||
|
||||
private final ImmutableSet<String> escapedTags;
|
||||
|
||||
private final int readAheadLimit;
|
||||
|
||||
@Inject
|
||||
public HtmlStripCharFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
this.readAheadLimit = settings.getAsInt("read_ahead", HTMLStripCharFilter.DEFAULT_READ_AHEAD);
|
||||
String[] escapedTags = settings.getAsArray("escaped_tags");
|
||||
if (escapedTags.length > 0) {
|
||||
this.escapedTags = ImmutableSet.copyOf(escapedTags);
|
||||
|
@ -53,12 +50,8 @@ public class HtmlStripCharFilterFactory extends AbstractCharFilterFactory {
|
|||
return escapedTags;
|
||||
}
|
||||
|
||||
public int readAheadLimit() {
|
||||
return readAheadLimit;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharStream create(CharStream tokenStream) {
|
||||
return new HTMLStripCharFilter(tokenStream, escapedTags, readAheadLimit);
|
||||
return new HTMLStripCharFilter(tokenStream, escapedTags);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,10 +19,7 @@
|
|||
|
||||
package org.elasticsearch.index.cache.bloom.simple;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
@ -48,7 +45,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
/**
|
||||
*
|
||||
*/
|
||||
public class SimpleBloomCache extends AbstractIndexComponent implements BloomCache, IndexReader.ReaderFinishedListener {
|
||||
public class SimpleBloomCache extends AbstractIndexComponent implements BloomCache, SegmentReader.CoreClosedListener {
|
||||
|
||||
private final ThreadPool threadPool;
|
||||
|
||||
|
@ -78,8 +75,8 @@ public class SimpleBloomCache extends AbstractIndexComponent implements BloomCac
|
|||
}
|
||||
|
||||
@Override
|
||||
public void finished(IndexReader reader) {
|
||||
clear(reader);
|
||||
public void onClose(SegmentReader owner) {
|
||||
clear(owner);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -126,7 +123,7 @@ public class SimpleBloomCache extends AbstractIndexComponent implements BloomCac
|
|||
synchronized (creationMutex) {
|
||||
fieldCache = cache.get(reader.getCoreCacheKey());
|
||||
if (fieldCache == null) {
|
||||
reader.addReaderFinishedListener(this);
|
||||
((SegmentReader) reader).addCoreClosedListener(this);
|
||||
fieldCache = ConcurrentCollections.newConcurrentMap();
|
||||
cache.put(reader.getCoreCacheKey(), fieldCache);
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.cache.field.data.support;
|
|||
|
||||
import com.google.common.cache.Cache;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
||||
|
@ -38,7 +39,7 @@ import java.util.concurrent.ConcurrentMap;
|
|||
/**
|
||||
*
|
||||
*/
|
||||
public abstract class AbstractConcurrentMapFieldDataCache extends AbstractIndexComponent implements FieldDataCache, IndexReader.ReaderFinishedListener {
|
||||
public abstract class AbstractConcurrentMapFieldDataCache extends AbstractIndexComponent implements FieldDataCache, SegmentReader.CoreClosedListener {
|
||||
|
||||
private final ConcurrentMap<Object, Cache<String, FieldData>> cache;
|
||||
|
||||
|
@ -67,8 +68,8 @@ public abstract class AbstractConcurrentMapFieldDataCache extends AbstractIndexC
|
|||
}
|
||||
|
||||
@Override
|
||||
public void finished(IndexReader reader) {
|
||||
clear(reader);
|
||||
public void onClose(SegmentReader owner) {
|
||||
clear(owner);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -108,7 +109,7 @@ public abstract class AbstractConcurrentMapFieldDataCache extends AbstractIndexC
|
|||
fieldDataCache = cache.get(reader.getCoreCacheKey());
|
||||
if (fieldDataCache == null) {
|
||||
fieldDataCache = buildFieldDataMap();
|
||||
reader.addReaderFinishedListener(this);
|
||||
((SegmentReader) reader).addCoreClosedListener(this);
|
||||
cache.put(reader.getCoreCacheKey(), fieldDataCache);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import com.google.common.cache.RemovalListener;
|
|||
import com.google.common.cache.RemovalNotification;
|
||||
import com.google.common.cache.Weigher;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
|
@ -45,7 +46,7 @@ import org.elasticsearch.indices.cache.filter.IndicesFilterCache;
|
|||
import java.io.IOException;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
|
||||
public class WeightedFilterCache extends AbstractIndexComponent implements FilterCache, IndexReader.ReaderFinishedListener, RemovalListener<WeightedFilterCache.FilterCacheKey, FilterCacheValue<DocSet>> {
|
||||
public class WeightedFilterCache extends AbstractIndexComponent implements FilterCache, SegmentReader.CoreClosedListener, RemovalListener<WeightedFilterCache.FilterCacheKey, FilterCacheValue<DocSet>> {
|
||||
|
||||
final IndicesFilterCache indicesFilterCache;
|
||||
|
||||
|
@ -91,8 +92,8 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
|
|||
}
|
||||
|
||||
@Override
|
||||
public void finished(IndexReader reader) {
|
||||
clear(reader);
|
||||
public void onClose(SegmentReader owner) {
|
||||
clear(owner);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -165,7 +166,7 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
|
|||
if (!cache.seenReaders.containsKey(reader.getCoreCacheKey())) {
|
||||
Boolean previous = cache.seenReaders.putIfAbsent(reader.getCoreCacheKey(), Boolean.TRUE);
|
||||
if (previous == null) {
|
||||
reader.addReaderFinishedListener(cache);
|
||||
((SegmentReader) reader).addCoreClosedListener(cache);
|
||||
cache.seenReadersCount.inc();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,10 +20,7 @@
|
|||
package org.elasticsearch.index.cache.id.simple;
|
||||
|
||||
import gnu.trove.impl.Constants;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.BytesWrap;
|
||||
|
@ -50,7 +47,7 @@ import java.util.concurrent.ConcurrentMap;
|
|||
/**
|
||||
*
|
||||
*/
|
||||
public class SimpleIdCache extends AbstractIndexComponent implements IdCache, IndexReader.ReaderFinishedListener {
|
||||
public class SimpleIdCache extends AbstractIndexComponent implements IdCache, SegmentReader.CoreClosedListener {
|
||||
|
||||
private final ConcurrentMap<Object, SimpleIdReaderCache> idReaders;
|
||||
|
||||
|
@ -71,8 +68,8 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, In
|
|||
}
|
||||
|
||||
@Override
|
||||
public void finished(IndexReader reader) {
|
||||
clear(reader);
|
||||
public void onClose(SegmentReader owner) {
|
||||
clear(owner);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -112,7 +109,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, In
|
|||
continue;
|
||||
}
|
||||
|
||||
reader.addReaderFinishedListener(this);
|
||||
((SegmentReader) reader).addCoreClosedListener(this);
|
||||
HashMap<String, TypeBuilder> readerBuilder = new HashMap<String, TypeBuilder>();
|
||||
builders.put(reader.getCoreCacheKey(), readerBuilder);
|
||||
|
||||
|
|
|
@ -20,9 +20,7 @@
|
|||
package org.elasticsearch.index.engine.robin;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.FilteredQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
|
@ -33,8 +31,6 @@ import org.elasticsearch.common.bloom.BloomFilter;
|
|||
import org.elasticsearch.common.collect.MapBuilder;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.lucene.manager.SearcherFactory;
|
||||
import org.elasticsearch.common.lucene.manager.SearcherManager;
|
||||
import org.elasticsearch.common.lucene.uid.UidField;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.ByteSizeUnit;
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
|||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
|
||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
|
@ -43,6 +44,7 @@ import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
|||
import org.apache.lucene.analysis.fr.ElisionFilter;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchStemFilter;
|
||||
import org.apache.lucene.analysis.ga.IrishAnalyzer;
|
||||
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
||||
import org.apache.lucene.analysis.hi.HindiAnalyzer;
|
||||
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
||||
|
@ -76,7 +78,6 @@ import org.elasticsearch.ElasticSearchIllegalStateException;
|
|||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.lucene.analysis.HTMLStripCharFilter;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
||||
|
@ -141,6 +142,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
analyzerProviderFactories.put("hindi", new PreBuiltAnalyzerProviderFactory("hindi", AnalyzerScope.INDICES, new HindiAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("hungarian", new PreBuiltAnalyzerProviderFactory("hungarian", AnalyzerScope.INDICES, new HungarianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("indonesian", new PreBuiltAnalyzerProviderFactory("indonesian", AnalyzerScope.INDICES, new IndonesianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("irish", new PreBuiltAnalyzerProviderFactory("irish", AnalyzerScope.INDICES, new IrishAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("italian", new PreBuiltAnalyzerProviderFactory("italian", AnalyzerScope.INDICES, new ItalianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("latvian", new PreBuiltAnalyzerProviderFactory("latvian", AnalyzerScope.INDICES, new LatvianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
analyzerProviderFactories.put("norwegian", new PreBuiltAnalyzerProviderFactory("norwegian", AnalyzerScope.INDICES, new NorwegianAnalyzer(Lucene.ANALYZER_VERSION)));
|
||||
|
|
|
@ -267,7 +267,6 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
|||
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
|
||||
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
|
||||
|
||||
// LUCENE 3.1 UPGRADE: Caused adding the space at the end...
|
||||
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field1").fragments()[0], equalTo("this is a <xxx>test</xxx>"));
|
||||
|
||||
logger.info("--> searching on _all, highlighting on field1");
|
||||
|
@ -342,7 +341,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
|||
assertThat(searchResponse.hits().hits().length, equalTo(COUNT));
|
||||
for (SearchHit hit : searchResponse.hits()) {
|
||||
// LUCENE 3.1 UPGRADE: Caused adding the space at the end...
|
||||
assertThat(hit.highlightFields().get("field1").fragments()[0], equalTo("<em>test</em> " + hit.id() + " "));
|
||||
assertThat(hit.highlightFields().get("field1").fragments()[0], equalTo("<em>test</em> " + hit.id()));
|
||||
}
|
||||
|
||||
logger.info("--> searching explicitly on field1 and highlighting on it, with DFS");
|
||||
|
@ -355,8 +354,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
|||
assertThat(searchResponse.hits().totalHits(), equalTo((long) COUNT));
|
||||
assertThat(searchResponse.hits().hits().length, equalTo(COUNT));
|
||||
for (SearchHit hit : searchResponse.hits()) {
|
||||
// LUCENE 3.1 UPGRADE: Caused adding the space at the end...
|
||||
assertThat(hit.highlightFields().get("field1").fragments()[0], equalTo("<em>test</em> " + hit.id() + " "));
|
||||
assertThat(hit.highlightFields().get("field1").fragments()[0], equalTo("<em>test</em> " + hit.id()));
|
||||
}
|
||||
|
||||
logger.info("--> searching explicitly _all and highlighting on _all");
|
||||
|
@ -368,7 +366,6 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
|||
assertThat(searchResponse.hits().totalHits(), equalTo((long) COUNT));
|
||||
assertThat(searchResponse.hits().hits().length, equalTo(COUNT));
|
||||
for (SearchHit hit : searchResponse.hits()) {
|
||||
// LUCENE 3.1 UPGRADE: Caused adding the space at the end...
|
||||
assertThat(hit.highlightFields().get("_all").fragments()[0], equalTo("<em>test</em> " + hit.id() + " "));
|
||||
}
|
||||
}
|
||||
|
@ -523,7 +520,6 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
|||
assertThat(search.hits().hits().length, equalTo(5));
|
||||
|
||||
for (SearchHit hit : search.hits()) {
|
||||
// LUCENE 3.1 UPGRADE: Caused adding the space at the end...
|
||||
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("highlighting <em>test</em> for *&? elasticsearch"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ public class VectorHighlighterTests {
|
|||
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
|
||||
reader, topDocs.scoreDocs[0].doc, "content", 30);
|
||||
assertThat(fragment, notNullValue());
|
||||
assertThat(fragment, equalTo("e big <b>bad</b> dog "));
|
||||
assertThat(fragment, equalTo("the big <b>bad</b> dog"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue