LUCENE-5170: Add getter for reuse strategy to Analyzer, make AnalyzerWrapper's reuse strategy configurable, fix strategy to be stateless

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1513903 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2013-08-14 14:41:36 +00:00
parent 97b418391f
commit 8bac549ce3
18 changed files with 139 additions and 81 deletions

View File

@ -155,6 +155,16 @@ API Changes
files. FSDirectory#setReadChunkSize() is now deprecated and will be removed
in Lucene 5.0. (Uwe Schindler, Robert Muir, gsingers)
* LUCENE-5170: Analyzer.ReuseStrategy instances are now stateless and can
be reused in other Analyzer instances, which was not possible before.
Lucene ships now with stateless singletons for per field and global reuse.
Legacy code can still instantiate the deprecated implementation classes,
but new code should use the constants. Implementors of custom strategies
have to take care of new method signatures. AnalyzerWrapper can now be
configured to use a custom strategy, too, ideally the one from the wrapped
Analyzer. Analyzer adds a getter to retrieve the strategy for this use-case.
(Uwe Schindler, Robert Muir, Shay Banon)
Optimizations
* LUCENE-5088: Added TermFilter to filter docs by a specific term.
@ -179,6 +189,9 @@ Optimizations
* LUCENE-5159: Prefix-code the sorted/sortedset value dictionaries in DiskDV.
(Robert Muir)
* LUCENE-5170: Fixed several wrapper analyzers to inherit the reuse strategy
of the wrapped Analyzer. (Uwe Schindler, Robert Muir, Shay Banon)
Documentation
* LUCENE-4894: remove facet userguide as it was outdated. Partially absorbed into
@ -194,6 +207,10 @@ Changes in backwards compatibility policy
no longer support multiple "dictionaries" as there is only one dictionary available.
(Dawid Weiss)
* LUCENE-5170: Changed method signatures of Analyzer.ReuseStrategy to take
Analyzer. Closeable interface was removed because the class was changed to
be stateless. (Uwe Schindler, Robert Muir, Shay Banon)
======================= Lucene 4.4.0 =======================
Changes in backwards compatibility policy

View File

@ -46,6 +46,7 @@ public final class LimitTokenCountAnalyzer extends AnalyzerWrapper {
* @param consumeAllTokens whether all tokens from the delegate should be consumed even if maxTokenCount is reached.
*/
public LimitTokenCountAnalyzer(Analyzer delegate, int maxTokenCount, boolean consumeAllTokens) {
super(delegate.getReuseStrategy());
this.delegate = delegate;
this.maxTokenCount = maxTokenCount;
this.consumeAllTokens = consumeAllTokens;

View File

@ -73,6 +73,7 @@ public final class PerFieldAnalyzerWrapper extends AnalyzerWrapper {
*/
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer,
Map<String, Analyzer> fieldAnalyzers) {
super(PER_FIELD_REUSE_STRATEGY);
this.defaultAnalyzer = defaultAnalyzer;
this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : Collections.<String, Analyzer>emptyMap();
}

View File

@ -148,6 +148,7 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
IndexReader indexReader,
Collection<String> fields,
int maxDocFreq) throws IOException {
super(delegate.getReuseStrategy());
this.matchVersion = matchVersion;
this.delegate = delegate;

View File

@ -30,7 +30,7 @@ import org.apache.lucene.util.Version;
*/
public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
private final Analyzer defaultAnalyzer;
private final Analyzer delegate;
private final int maxShingleSize;
private final int minShingleSize;
private final String tokenSeparator;
@ -52,7 +52,7 @@ public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
/**
* Creates a new ShingleAnalyzerWrapper
*
* @param defaultAnalyzer Analyzer whose TokenStream is to be filtered
* @param delegate Analyzer whose TokenStream is to be filtered
* @param minShingleSize Min shingle (token ngram) size
* @param maxShingleSize Max shingle size
* @param tokenSeparator Used to separate input stream tokens in output shingles
@ -65,13 +65,14 @@ public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
* regardless of whether any shingles are available.
*/
public ShingleAnalyzerWrapper(
Analyzer defaultAnalyzer,
Analyzer delegate,
int minShingleSize,
int maxShingleSize,
String tokenSeparator,
boolean outputUnigrams,
boolean outputUnigramsIfNoShingles) {
this.defaultAnalyzer = defaultAnalyzer;
super(delegate.getReuseStrategy());
this.delegate = delegate;
if (maxShingleSize < 2) {
throw new IllegalArgumentException("Max shingle size must be >= 2");
@ -138,7 +139,7 @@ public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return defaultAnalyzer;
return delegate;
}
@Override

View File

@ -72,12 +72,15 @@ public abstract class Analyzer implements Closeable {
private final ReuseStrategy reuseStrategy;
// non final as it gets nulled if closed; pkg private for access by ReuseStrategy's final helper methods:
CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
/**
* Create a new Analyzer, reusing the same set of components per-thread
* across calls to {@link #tokenStream(String, Reader)}.
*/
public Analyzer() {
this(new GlobalReuseStrategy());
this(GLOBAL_REUSE_STRATEGY);
}
/**
@ -133,11 +136,11 @@ public abstract class Analyzer implements Closeable {
*/
public final TokenStream tokenStream(final String fieldName,
final Reader reader) throws IOException {
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
TokenStreamComponents components = reuseStrategy.getReusableComponents(this, fieldName);
final Reader r = initReader(fieldName, reader);
if (components == null) {
components = createComponents(fieldName, r);
reuseStrategy.setReusableComponents(fieldName, components);
reuseStrategy.setReusableComponents(this, fieldName, components);
} else {
components.setReader(r);
}
@ -167,7 +170,7 @@ public abstract class Analyzer implements Closeable {
* @see #tokenStream(String, Reader)
*/
public final TokenStream tokenStream(final String fieldName, final String text) throws IOException {
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
TokenStreamComponents components = reuseStrategy.getReusableComponents(this, fieldName);
@SuppressWarnings("resource") final ReusableStringReader strReader =
(components == null || components.reusableStringReader == null) ?
new ReusableStringReader() : components.reusableStringReader;
@ -175,7 +178,7 @@ public abstract class Analyzer implements Closeable {
final Reader r = initReader(fieldName, strReader);
if (components == null) {
components = createComponents(fieldName, r);
reuseStrategy.setReusableComponents(fieldName, components);
reuseStrategy.setReusableComponents(this, fieldName, components);
} else {
components.setReader(r);
}
@ -229,10 +232,20 @@ public abstract class Analyzer implements Closeable {
return 1;
}
/**
* Returns the used {@link ReuseStrategy}.
*/
public final ReuseStrategy getReuseStrategy() {
return reuseStrategy;
}
/** Frees persistent resources used by this Analyzer */
@Override
public void close() {
reuseStrategy.close();
if (storedValue != null) {
storedValue.close();
storedValue = null;
}
}
/**
@ -317,123 +330,126 @@ public abstract class Analyzer implements Closeable {
* Strategy defining how TokenStreamComponents are reused per call to
* {@link Analyzer#tokenStream(String, java.io.Reader)}.
*/
public static abstract class ReuseStrategy implements Closeable {
private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
public static abstract class ReuseStrategy {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
public ReuseStrategy() {}
/**
* Gets the reusable TokenStreamComponents for the field with the given name
* Gets the reusable TokenStreamComponents for the field with the given name.
*
* @param analyzer Analyzer from which to get the reused components. Use
* {@link #getStoredValue(Analyzer)} and {@link #setStoredValue(Analyzer, Object)}
* to access the data on the Analyzer.
* @param fieldName Name of the field whose reusable TokenStreamComponents
* are to be retrieved
* @return Reusable TokenStreamComponents for the field, or {@code null}
* if there was no previous components for the field
*/
public abstract TokenStreamComponents getReusableComponents(String fieldName);
public abstract TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName);
/**
* Stores the given TokenStreamComponents as the reusable components for the
* field with the give name
* field with the give name.
*
* @param fieldName Name of the field whose TokenStreamComponents are being set
* @param components TokenStreamComponents which are to be reused for the field
*/
public abstract void setReusableComponents(String fieldName, TokenStreamComponents components);
public abstract void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components);
/**
* Returns the currently stored value
* Returns the currently stored value.
*
* @return Currently stored value or {@code null} if no value is stored
* @throws AlreadyClosedException if the ReuseStrategy is closed.
* @throws AlreadyClosedException if the Analyzer is closed.
*/
protected final Object getStoredValue() {
try {
return storedValue.get();
} catch (NullPointerException npe) {
if (storedValue == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
protected final Object getStoredValue(Analyzer analyzer) {
if (analyzer.storedValue == null) {
throw new AlreadyClosedException("this Analyzer is closed");
}
return analyzer.storedValue.get();
}
/**
* Sets the stored value
* Sets the stored value.
*
* @param storedValue Value to store
* @throws AlreadyClosedException if the ReuseStrategy is closed.
* @throws AlreadyClosedException if the Analyzer is closed.
*/
protected final void setStoredValue(Object storedValue) {
try {
this.storedValue.set(storedValue);
} catch (NullPointerException npe) {
if (storedValue == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
protected final void setStoredValue(Analyzer analyzer, Object storedValue) {
if (analyzer.storedValue == null) {
throw new AlreadyClosedException("this Analyzer is closed");
}
analyzer.storedValue.set(storedValue);
}
/**
* Closes the ReuseStrategy, freeing any resources
*/
@Override
public void close() {
if (storedValue != null) {
storedValue.close();
storedValue = null;
}
}
}
/**
* A predefined {@link ReuseStrategy} that reuses the same components for
* every field.
*/
public static final ReuseStrategy GLOBAL_REUSE_STRATEGY = new GlobalReuseStrategy();
/**
* Implementation of {@link ReuseStrategy} that reuses the same components for
* every field.
* @deprecated This implementation class will be hidden in Lucene 5.0.
* Use {@link Analyzer#GLOBAL_REUSE_STRATEGY} instead!
*/
@Deprecated
public final static class GlobalReuseStrategy extends ReuseStrategy {
/** Creates a new instance, with empty per-thread values */
/** Sole constructor. (For invocation by subclass constructors, typically implicit.)
* @deprecated Don't create instances of this class, use {@link Analyzer#GLOBAL_REUSE_STRATEGY} */
@Deprecated
public GlobalReuseStrategy() {}
@Override
public TokenStreamComponents getReusableComponents(String fieldName) {
return (TokenStreamComponents) getStoredValue();
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
return (TokenStreamComponents) getStoredValue(analyzer);
}
@Override
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
setStoredValue(components);
public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) {
setStoredValue(analyzer, components);
}
}
/**
* Implementation of {@link ReuseStrategy} that reuses components per-field by
* A predefined {@link ReuseStrategy} that reuses components per-field by
* maintaining a Map of TokenStreamComponent per field name.
*/
public static final ReuseStrategy PER_FIELD_REUSE_STRATEGY = new PerFieldReuseStrategy();
/**
* Implementation of {@link ReuseStrategy} that reuses components per-field by
* maintaining a Map of TokenStreamComponent per field name.
* @deprecated This implementation class will be hidden in Lucene 5.0.
* Use {@link Analyzer#PER_FIELD_REUSE_STRATEGY} instead!
*/
@Deprecated
public static class PerFieldReuseStrategy extends ReuseStrategy {
/** Creates a new instance, with empty per-thread-per-field values */
/** Sole constructor. (For invocation by subclass constructors, typically implicit.)
* @deprecated Don't create instances of this class, use {@link Analyzer#PER_FIELD_REUSE_STRATEGY} */
@Deprecated
public PerFieldReuseStrategy() {}
@SuppressWarnings("unchecked")
@Override
public TokenStreamComponents getReusableComponents(String fieldName) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue(analyzer);
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
}
@SuppressWarnings("unchecked")
@Override
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue(analyzer);
if (componentsPerField == null) {
componentsPerField = new HashMap<String, TokenStreamComponents>();
setStoredValue(componentsPerField);
setStoredValue(analyzer, componentsPerField);
}
componentsPerField.put(fieldName, components);
}

View File

@ -34,10 +34,27 @@ public abstract class AnalyzerWrapper extends Analyzer {
/**
* Creates a new AnalyzerWrapper. Since the {@link Analyzer.ReuseStrategy} of
* the wrapped Analyzers are unknown, {@link Analyzer.PerFieldReuseStrategy} is assumed
* the wrapped Analyzers are unknown, {@link #PER_FIELD_REUSE_STRATEGY} is assumed.
* @deprecated Use {@link #AnalyzerWrapper(Analyzer.ReuseStrategy)}
* and specify a valid {@link Analyzer.ReuseStrategy}, probably retrieved from the
* wrapped analyzer using {@link #getReuseStrategy()}.
*/
@Deprecated
protected AnalyzerWrapper() {
super(new PerFieldReuseStrategy());
this(PER_FIELD_REUSE_STRATEGY);
}
/**
* Creates a new AnalyzerWrapper with the given reuse strategy.
* <p>If you want to wrap a single delegate Analyzer you can probably
* reuse its strategy when instantiating this subclass:
* {@code super(delegate.getReuseStrategy());}.
* <p>If you choose different analyzers per field, use
* {@link #PER_FIELD_REUSE_STRATEGY}.
* @see #getReuseStrategy()
*/
protected AnalyzerWrapper(ReuseStrategy reuseStrategy) {
super(reuseStrategy);
}
/**

View File

@ -135,7 +135,8 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
// LUCENE-5153: test that wrapping an analyzer's reader is allowed
final Random random = random();
Analyzer a = new AnalyzerWrapper() {
final Analyzer delegate = new MockAnalyzer(random);
Analyzer a = new AnalyzerWrapper(delegate.getReuseStrategy()) {
@Override
protected Reader wrapReader(String fieldName, Reader reader) {
@ -149,7 +150,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return new MockAnalyzer(random);
return delegate;
}
};

View File

@ -67,7 +67,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
// creates 8 fields with different options and does "duels" of fields against each other
public void test() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader);

View File

@ -381,7 +381,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
doc.add(newTextField("field", "a field", Field.Store.YES));
w.addDocument(doc);
Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
@ -590,7 +590,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
}
public void testDocumentsWriterExceptions() throws IOException {
Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
@ -685,7 +685,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
}
public void testDocumentsWriterExceptionThreads() throws Exception {
Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

View File

@ -363,11 +363,11 @@ public class TestPayloads extends LuceneTestCase {
Map<String,PayloadData> fieldToData = new HashMap<String,PayloadData>();
public PayloadAnalyzer() {
super(new PerFieldReuseStrategy());
super(PER_FIELD_REUSE_STRATEGY);
}
public PayloadAnalyzer(String field, byte[] data, int offset, int length) {
super(new PerFieldReuseStrategy());
super(PER_FIELD_REUSE_STRATEGY);
setPayloadData(field, data, offset, length);
}

View File

@ -59,7 +59,7 @@ public class PayloadHelper {
public final class PayloadAnalyzer extends Analyzer {
public PayloadAnalyzer() {
super(new PerFieldReuseStrategy());
super(PER_FIELD_REUSE_STRATEGY);
}
@Override

View File

@ -64,7 +64,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
private static class PayloadAnalyzer extends Analyzer {
private PayloadAnalyzer() {
super(new PerFieldReuseStrategy());
super(PER_FIELD_REUSE_STRATEGY);
}
@Override

View File

@ -311,7 +311,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase {
MockAnalyzer stdAnalyzer = new MockAnalyzer(random());
public AnalyzerReturningNull() {
super(new PerFieldReuseStrategy());
super(PER_FIELD_REUSE_STRATEGY);
}
@Override

View File

@ -347,7 +347,7 @@ public class TestMultiFieldQPHelper extends LuceneTestCase {
MockAnalyzer stdAnalyzer = new MockAnalyzer(random());
public AnalyzerReturningNull() {
super(new PerFieldReuseStrategy());
super(PER_FIELD_REUSE_STRATEGY);
}
@Override

View File

@ -99,10 +99,10 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
protected final static String TEXT_FIELD_NAME = "text";
private final Analyzer queryAnalyzer;
private final Analyzer indexAnalyzer;
private final Version matchVersion;
final Analyzer indexAnalyzer;
final Version matchVersion;
private final File indexPath;
private final int minPrefixChars;
final int minPrefixChars;
private Directory dir;
/** {@link IndexSearcher} used for lookups. */
@ -193,7 +193,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
AtomicReader r = null;
boolean success = false;
try {
Analyzer gramAnalyzer = new AnalyzerWrapper() {
Analyzer gramAnalyzer = new AnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return indexAnalyzer;

View File

@ -60,7 +60,7 @@ public final class MockAnalyzer extends Analyzer {
* @param filter DFA describing how terms should be filtered (set of stopwords, etc)
*/
public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter) {
super(new PerFieldReuseStrategy());
super(PER_FIELD_REUSE_STRATEGY);
// TODO: this should be solved in a different way; Random should not be shared (!).
this.random = new Random(random.nextLong());
this.runAutomaton = runAutomaton;

View File

@ -379,6 +379,7 @@ public class IndexSchema {
protected final HashMap<String, Analyzer> analyzers;
SolrIndexAnalyzer() {
super(PER_FIELD_REUSE_STRATEGY);
analyzers = analyzerCache();
}
@ -400,6 +401,8 @@ public class IndexSchema {
}
private class SolrQueryAnalyzer extends SolrIndexAnalyzer {
SolrQueryAnalyzer() {}
@Override
protected HashMap<String, Analyzer> analyzerCache() {
HashMap<String, Analyzer> cache = new HashMap<String, Analyzer>();