LUCENE-5803: Add DelegatingAnalyzerWrapper, an optimized variant of AnalyzerWrapper that doesn't allow to wrap components or readers

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1607998 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2014-07-05 11:32:31 +00:00
parent 271576ed0f
commit 4cdfa19970
9 changed files with 181 additions and 36 deletions

View File

@ -125,6 +125,12 @@ Optimizations
* LUCENE-5797: Optimize norms merging (Adrien Grand, Robert Muir)
* LUCENE-5803: Add DelegatingAnalyzerWrapper, an optimized variant
of AnalyzerWrapper that doesn't allow to wrap components or readers.
This wrapper class is the base class of all analyzers that just delegate
to another analyzer, e.g. per field name: PerFieldAnalyzerWrapper and
Solr's schema support. (Shay Banon, Uwe Schindler, Robert Muir)
Test Framework
* LUCENE-5786: Unflushed/ truncated events file (hung testing subprocess).

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import java.util.Collections;
import java.util.Map;
@ -48,7 +48,7 @@ import java.util.Map;
* <p>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
* and query parsing.
*/
public final class PerFieldAnalyzerWrapper extends AnalyzerWrapper {
public final class PerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper {
private final Analyzer defaultAnalyzer;
private final Map<String, Analyzer> fieldAnalyzers;

View File

@ -1,14 +1,21 @@
package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockCharFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Rethrow;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -31,8 +38,8 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
public void testPerField() throws Exception {
String text = "Qwerty";
Map<String, Analyzer> analyzerPerField = new HashMap<>();
analyzerPerField.put("special", new SimpleAnalyzer(TEST_VERSION_CURRENT));
Map<String,Analyzer> analyzerPerField =
Collections.<String,Analyzer>singletonMap("special", new SimpleAnalyzer(TEST_VERSION_CURRENT));
PerFieldAnalyzerWrapper analyzer =
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);
@ -62,6 +69,63 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
}
}
public void testReuseWrapped() throws Exception {
final String text = "Qwerty";
final Analyzer specialAnalyzer = new SimpleAnalyzer(TEST_VERSION_CURRENT);
final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
TokenStream ts1, ts2, ts3, ts4;
final PerFieldAnalyzerWrapper wrapper1 = new PerFieldAnalyzerWrapper(defaultAnalyzer,
Collections.<String,Analyzer>singletonMap("special", specialAnalyzer));
// test that the PerFieldWrapper returns the same instance as original Analyzer:
ts1 = defaultAnalyzer.tokenStream("something", text);
ts2 = wrapper1.tokenStream("something", text);
assertSame(ts1, ts2);
ts1 = specialAnalyzer.tokenStream("special", text);
ts2 = wrapper1.tokenStream("special", text);
assertSame(ts1, ts2);
// Wrap with another wrapper, which does *not* extend DelegatingAnalyzerWrapper:
final AnalyzerWrapper wrapper2 = new AnalyzerWrapper(wrapper1.getReuseStrategy()) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return wrapper1;
}
@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
try {
assertNotSame(specialAnalyzer.tokenStream("special", text), components.getTokenStream());
} catch (IOException e) {
Rethrow.rethrow(e);
}
TokenFilter filter = new ASCIIFoldingFilter(components.getTokenStream());
return new TokenStreamComponents(components.getTokenizer(), filter);
}
};
ts3 = wrapper2.tokenStream("special", text);
assertNotSame(ts1, ts3);
assertTrue(ts3 instanceof ASCIIFoldingFilter);
// check that cache did not get corrumpted:
ts2 = wrapper1.tokenStream("special", text);
assertSame(ts1, ts2);
// Wrap PerField with another PerField. In that case all TokenStreams returned must be the same:
final PerFieldAnalyzerWrapper wrapper3 = new PerFieldAnalyzerWrapper(wrapper1,
Collections.<String,Analyzer>singletonMap("moreSpecial", specialAnalyzer));
ts1 = specialAnalyzer.tokenStream("special", text);
ts2 = wrapper3.tokenStream("special", text);
assertSame(ts1, ts2);
ts3 = specialAnalyzer.tokenStream("moreSpecial", text);
ts4 = wrapper3.tokenStream("moreSpecial", text);
assertSame(ts3, ts4);
assertSame(ts2, ts3);
}
public void testCharFilters() throws Exception {
Analyzer a = new Analyzer() {
@Override

View File

@ -32,18 +32,6 @@ import java.io.Reader;
*/
public abstract class AnalyzerWrapper extends Analyzer {
/**
* Creates a new AnalyzerWrapper. Since the {@link Analyzer.ReuseStrategy} of
* the wrapped Analyzers are unknown, {@link #PER_FIELD_REUSE_STRATEGY} is assumed.
* @deprecated Use {@link #AnalyzerWrapper(Analyzer.ReuseStrategy)}
* and specify a valid {@link Analyzer.ReuseStrategy}, probably retrieved from the
* wrapped analyzer using {@link #getReuseStrategy()}.
*/
@Deprecated
protected AnalyzerWrapper() {
this(PER_FIELD_REUSE_STRATEGY);
}
/**
* Creates a new AnalyzerWrapper with the given reuse strategy.
* <p>If you want to wrap a single delegate Analyzer you can probably

View File

@ -0,0 +1,92 @@
package org.apache.lucene.analysis;
import java.io.Reader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An analyzer wrapper, that doesn't allow to wrap components or readers.
* By disallowing it, it means that the thread local resources can be delegated
* to the delegate analyzer, and not also be allocated on this analyzer.
* This wrapper class is the base class of all analyzers that just delegate to
* another analyzer, e.g. per field name.
*
* <p>This solves the problem of per field analyzer wrapper, where it also
* maintains a thread local per field token stream components, while it can
* safely delegate those and not also hold these data structures, which can
* become expensive memory wise.
*
* <p><b>Please note:</b> This analyzer uses a private {@link Analyzer.ReuseStrategy},
* which is returned by {@link #getReuseStrategy()}. This strategy is used when
* delegating. If you wrap this analyzer again and reuse this strategy, no
* delegation is done and the given fallback is used.
*/
public abstract class DelegatingAnalyzerWrapper extends AnalyzerWrapper {
/**
* Constructor.
* @param fallbackStrategy is the strategy to use if delegation is not possible
* This is to support the common pattern:
* {@code new OtherWrapper(thisWrapper.getReuseStrategy())}
*/
protected DelegatingAnalyzerWrapper(ReuseStrategy fallbackStrategy) {
super(new DelegatingReuseStrategy(fallbackStrategy));
// häckidy-hick-hack, because we cannot call super() with a reference to "this":
((DelegatingReuseStrategy) getReuseStrategy()).wrapper = this;
}
@Override
protected final TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return super.wrapComponents(fieldName, components);
}
@Override
protected final Reader wrapReader(String fieldName, Reader reader) {
return super.wrapReader(fieldName, reader);
}
private static final class DelegatingReuseStrategy extends ReuseStrategy {
DelegatingAnalyzerWrapper wrapper;
private final ReuseStrategy fallbackStrategy;
DelegatingReuseStrategy(ReuseStrategy fallbackStrategy) {
this.fallbackStrategy = fallbackStrategy;
}
@Override
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
if (analyzer == wrapper) {
final Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
return wrappedAnalyzer.getReuseStrategy().getReusableComponents(wrappedAnalyzer, fieldName);
} else {
return fallbackStrategy.getReusableComponents(analyzer, fieldName);
}
}
@Override
public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) {
if (analyzer == wrapper) {
final Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
wrappedAnalyzer.getReuseStrategy().setReusableComponents(wrappedAnalyzer, fieldName, components);
} else {
fallbackStrategy.setReusableComponents(analyzer, fieldName, components);
}
}
};
}

View File

@ -273,11 +273,6 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
return new MockCharFilter(reader, 7);
}
@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return components;
}
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return delegate;
@ -292,7 +287,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
final int positionGap = random().nextInt(1000);
final int offsetGap = random().nextInt(1000);
final Analyzer delegate = new MockAnalyzer(random());
final Analyzer a = new AnalyzerWrapper(delegate.getReuseStrategy()) {
final Analyzer a = new DelegatingAnalyzerWrapper(delegate.getReuseStrategy()) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return delegate;

View File

@ -22,8 +22,8 @@ import java.util.Set;
import java.util.TreeMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
@ -606,7 +606,7 @@ public class FastVectorHighlighterTest extends LuceneTestCase {
fieldAnalyzers.put( "field_tripples", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp("...").toAutomaton() ), true ) );
fieldAnalyzers.put( "field_sliced", fieldAnalyzers.get( "field" ) );
fieldAnalyzers.put( "field_der_red", fieldAnalyzers.get( "field" ) ); // This is required even though we provide a token stream
Analyzer analyzer = new AnalyzerWrapper() {
Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
public Analyzer getWrappedAnalyzer(String fieldName) {
return fieldAnalyzers.get( fieldName );
}

View File

@ -18,8 +18,7 @@
package org.apache.solr.schema;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
@ -397,7 +396,7 @@ public class IndexSchema {
return false;
}
private class SolrIndexAnalyzer extends AnalyzerWrapper {
private class SolrIndexAnalyzer extends DelegatingAnalyzerWrapper {
protected final HashMap<String, Analyzer> analyzers;
SolrIndexAnalyzer() {

View File

@ -61,6 +61,7 @@ public class PreAnalyzedField extends FieldType {
private PreAnalyzedParser parser;
private Analyzer analyzer;
@Override
public void init(IndexSchema schema, Map<String, String> args) {
@ -87,19 +88,19 @@ public class PreAnalyzedField extends FieldType {
}
args.remove(PARSER_IMPL);
}
}
@Override
public Analyzer getIndexAnalyzer() {
return new SolrAnalyzer() {
// create Analyzer instance for reuse:
analyzer = new SolrAnalyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new PreAnalyzedTokenizer(parser));
}
};
}
@Override
public Analyzer getIndexAnalyzer() {
return analyzer;
}
@Override
public Analyzer getQueryAnalyzer() {