mirror of https://github.com/apache/lucene.git
LUCENE-5803: Add DelegatingAnalyzerWrapper, an optimized variant of AnalyzerWrapper that doesn't allow to wrap components or readers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1607998 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
271576ed0f
commit
4cdfa19970
|
@ -125,6 +125,12 @@ Optimizations
|
|||
|
||||
* LUCENE-5797: Optimize norms merging (Adrien Grand, Robert Muir)
|
||||
|
||||
* LUCENE-5803: Add DelegatingAnalyzerWrapper, an optimized variant
|
||||
of AnalyzerWrapper that doesn't allow to wrap components or readers.
|
||||
This wrapper class is the base class of all analyzers that just delegate
|
||||
to another analyzer, e.g. per field name: PerFieldAnalyzerWrapper and
|
||||
Solr's schema support. (Shay Banon, Uwe Schindler, Robert Muir)
|
||||
|
||||
Test Framework
|
||||
|
||||
* LUCENE-5786: Unflushed/ truncated events file (hung testing subprocess).
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
@ -48,7 +48,7 @@ import java.util.Map;
|
|||
* <p>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
|
||||
* and query parsing.
|
||||
*/
|
||||
public final class PerFieldAnalyzerWrapper extends AnalyzerWrapper {
|
||||
public final class PerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper {
|
||||
private final Analyzer defaultAnalyzer;
|
||||
private final Map<String, Analyzer> fieldAnalyzers;
|
||||
|
||||
|
|
|
@ -1,14 +1,21 @@
|
|||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockCharFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.Rethrow;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -31,8 +38,8 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
|
|||
public void testPerField() throws Exception {
|
||||
String text = "Qwerty";
|
||||
|
||||
Map<String, Analyzer> analyzerPerField = new HashMap<>();
|
||||
analyzerPerField.put("special", new SimpleAnalyzer(TEST_VERSION_CURRENT));
|
||||
Map<String,Analyzer> analyzerPerField =
|
||||
Collections.<String,Analyzer>singletonMap("special", new SimpleAnalyzer(TEST_VERSION_CURRENT));
|
||||
|
||||
PerFieldAnalyzerWrapper analyzer =
|
||||
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);
|
||||
|
@ -62,6 +69,63 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testReuseWrapped() throws Exception {
|
||||
final String text = "Qwerty";
|
||||
|
||||
final Analyzer specialAnalyzer = new SimpleAnalyzer(TEST_VERSION_CURRENT);
|
||||
final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
TokenStream ts1, ts2, ts3, ts4;
|
||||
|
||||
final PerFieldAnalyzerWrapper wrapper1 = new PerFieldAnalyzerWrapper(defaultAnalyzer,
|
||||
Collections.<String,Analyzer>singletonMap("special", specialAnalyzer));
|
||||
|
||||
// test that the PerFieldWrapper returns the same instance as original Analyzer:
|
||||
ts1 = defaultAnalyzer.tokenStream("something", text);
|
||||
ts2 = wrapper1.tokenStream("something", text);
|
||||
assertSame(ts1, ts2);
|
||||
|
||||
ts1 = specialAnalyzer.tokenStream("special", text);
|
||||
ts2 = wrapper1.tokenStream("special", text);
|
||||
assertSame(ts1, ts2);
|
||||
|
||||
// Wrap with another wrapper, which does *not* extend DelegatingAnalyzerWrapper:
|
||||
final AnalyzerWrapper wrapper2 = new AnalyzerWrapper(wrapper1.getReuseStrategy()) {
|
||||
@Override
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return wrapper1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
try {
|
||||
assertNotSame(specialAnalyzer.tokenStream("special", text), components.getTokenStream());
|
||||
} catch (IOException e) {
|
||||
Rethrow.rethrow(e);
|
||||
}
|
||||
TokenFilter filter = new ASCIIFoldingFilter(components.getTokenStream());
|
||||
return new TokenStreamComponents(components.getTokenizer(), filter);
|
||||
}
|
||||
};
|
||||
ts3 = wrapper2.tokenStream("special", text);
|
||||
assertNotSame(ts1, ts3);
|
||||
assertTrue(ts3 instanceof ASCIIFoldingFilter);
|
||||
// check that cache did not get corrumpted:
|
||||
ts2 = wrapper1.tokenStream("special", text);
|
||||
assertSame(ts1, ts2);
|
||||
|
||||
// Wrap PerField with another PerField. In that case all TokenStreams returned must be the same:
|
||||
final PerFieldAnalyzerWrapper wrapper3 = new PerFieldAnalyzerWrapper(wrapper1,
|
||||
Collections.<String,Analyzer>singletonMap("moreSpecial", specialAnalyzer));
|
||||
ts1 = specialAnalyzer.tokenStream("special", text);
|
||||
ts2 = wrapper3.tokenStream("special", text);
|
||||
assertSame(ts1, ts2);
|
||||
ts3 = specialAnalyzer.tokenStream("moreSpecial", text);
|
||||
ts4 = wrapper3.tokenStream("moreSpecial", text);
|
||||
assertSame(ts3, ts4);
|
||||
assertSame(ts2, ts3);
|
||||
}
|
||||
|
||||
public void testCharFilters() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
|
|
|
@ -32,18 +32,6 @@ import java.io.Reader;
|
|||
*/
|
||||
public abstract class AnalyzerWrapper extends Analyzer {
|
||||
|
||||
/**
|
||||
* Creates a new AnalyzerWrapper. Since the {@link Analyzer.ReuseStrategy} of
|
||||
* the wrapped Analyzers are unknown, {@link #PER_FIELD_REUSE_STRATEGY} is assumed.
|
||||
* @deprecated Use {@link #AnalyzerWrapper(Analyzer.ReuseStrategy)}
|
||||
* and specify a valid {@link Analyzer.ReuseStrategy}, probably retrieved from the
|
||||
* wrapped analyzer using {@link #getReuseStrategy()}.
|
||||
*/
|
||||
@Deprecated
|
||||
protected AnalyzerWrapper() {
|
||||
this(PER_FIELD_REUSE_STRATEGY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new AnalyzerWrapper with the given reuse strategy.
|
||||
* <p>If you want to wrap a single delegate Analyzer you can probably
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An analyzer wrapper, that doesn't allow to wrap components or readers.
|
||||
* By disallowing it, it means that the thread local resources can be delegated
|
||||
* to the delegate analyzer, and not also be allocated on this analyzer.
|
||||
* This wrapper class is the base class of all analyzers that just delegate to
|
||||
* another analyzer, e.g. per field name.
|
||||
*
|
||||
* <p>This solves the problem of per field analyzer wrapper, where it also
|
||||
* maintains a thread local per field token stream components, while it can
|
||||
* safely delegate those and not also hold these data structures, which can
|
||||
* become expensive memory wise.
|
||||
*
|
||||
* <p><b>Please note:</b> This analyzer uses a private {@link Analyzer.ReuseStrategy},
|
||||
* which is returned by {@link #getReuseStrategy()}. This strategy is used when
|
||||
* delegating. If you wrap this analyzer again and reuse this strategy, no
|
||||
* delegation is done and the given fallback is used.
|
||||
*/
|
||||
public abstract class DelegatingAnalyzerWrapper extends AnalyzerWrapper {
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param fallbackStrategy is the strategy to use if delegation is not possible
|
||||
* This is to support the common pattern:
|
||||
* {@code new OtherWrapper(thisWrapper.getReuseStrategy())}
|
||||
*/
|
||||
protected DelegatingAnalyzerWrapper(ReuseStrategy fallbackStrategy) {
|
||||
super(new DelegatingReuseStrategy(fallbackStrategy));
|
||||
// häckidy-hick-hack, because we cannot call super() with a reference to "this":
|
||||
((DelegatingReuseStrategy) getReuseStrategy()).wrapper = this;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
return super.wrapComponents(fieldName, components);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final Reader wrapReader(String fieldName, Reader reader) {
|
||||
return super.wrapReader(fieldName, reader);
|
||||
}
|
||||
|
||||
private static final class DelegatingReuseStrategy extends ReuseStrategy {
|
||||
DelegatingAnalyzerWrapper wrapper;
|
||||
private final ReuseStrategy fallbackStrategy;
|
||||
|
||||
DelegatingReuseStrategy(ReuseStrategy fallbackStrategy) {
|
||||
this.fallbackStrategy = fallbackStrategy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
|
||||
if (analyzer == wrapper) {
|
||||
final Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
|
||||
return wrappedAnalyzer.getReuseStrategy().getReusableComponents(wrappedAnalyzer, fieldName);
|
||||
} else {
|
||||
return fallbackStrategy.getReusableComponents(analyzer, fieldName);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) {
|
||||
if (analyzer == wrapper) {
|
||||
final Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
|
||||
wrappedAnalyzer.getReuseStrategy().setReusableComponents(wrappedAnalyzer, fieldName, components);
|
||||
} else {
|
||||
fallbackStrategy.setReusableComponents(analyzer, fieldName, components);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
|
@ -273,11 +273,6 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
return new MockCharFilter(reader, 7);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
return components;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return delegate;
|
||||
|
@ -292,7 +287,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
final int positionGap = random().nextInt(1000);
|
||||
final int offsetGap = random().nextInt(1000);
|
||||
final Analyzer delegate = new MockAnalyzer(random());
|
||||
final Analyzer a = new AnalyzerWrapper(delegate.getReuseStrategy()) {
|
||||
final Analyzer a = new DelegatingAnalyzerWrapper(delegate.getReuseStrategy()) {
|
||||
@Override
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return delegate;
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.util.Set;
|
|||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
|
@ -606,7 +606,7 @@ public class FastVectorHighlighterTest extends LuceneTestCase {
|
|||
fieldAnalyzers.put( "field_tripples", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp("...").toAutomaton() ), true ) );
|
||||
fieldAnalyzers.put( "field_sliced", fieldAnalyzers.get( "field" ) );
|
||||
fieldAnalyzers.put( "field_der_red", fieldAnalyzers.get( "field" ) ); // This is required even though we provide a token stream
|
||||
Analyzer analyzer = new AnalyzerWrapper() {
|
||||
Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
|
||||
public Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return fieldAnalyzers.get( fieldName );
|
||||
}
|
||||
|
|
|
@ -18,8 +18,7 @@
|
|||
package org.apache.solr.schema;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
@ -397,7 +396,7 @@ public class IndexSchema {
|
|||
return false;
|
||||
}
|
||||
|
||||
private class SolrIndexAnalyzer extends AnalyzerWrapper {
|
||||
private class SolrIndexAnalyzer extends DelegatingAnalyzerWrapper {
|
||||
protected final HashMap<String, Analyzer> analyzers;
|
||||
|
||||
SolrIndexAnalyzer() {
|
||||
|
|
|
@ -61,6 +61,7 @@ public class PreAnalyzedField extends FieldType {
|
|||
|
||||
|
||||
private PreAnalyzedParser parser;
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void init(IndexSchema schema, Map<String, String> args) {
|
||||
|
@ -87,19 +88,19 @@ public class PreAnalyzedField extends FieldType {
|
|||
}
|
||||
args.remove(PARSER_IMPL);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Analyzer getIndexAnalyzer() {
|
||||
return new SolrAnalyzer() {
|
||||
|
||||
// create Analyzer instance for reuse:
|
||||
analyzer = new SolrAnalyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new PreAnalyzedTokenizer(parser));
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Analyzer getIndexAnalyzer() {
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Analyzer getQueryAnalyzer() {
|
||||
|
|
Loading…
Reference in New Issue