mirror of https://github.com/apache/lucene.git
SOLR-7765: Hardened the behavior of TokenizerChain when null arguments are used in constructor. This prevents NPEs in some code paths.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692170 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
585f8a7b26
commit
9e61daf0fa
|
@ -239,6 +239,9 @@ Bug Fixes
|
|||
* SOLR-7810: map-reduce contrib script to set classpath for convenience refers to example
|
||||
rather than server. (Mark Miller)
|
||||
|
||||
* SOLR-7765: Hardened the behavior of TokenizerChain when null arguments are used in constructor.
|
||||
This prevents NPEs in some code paths. (Konstantin Gribov, hossman)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -29,22 +29,47 @@ import java.io.Reader;
|
|||
* create a TokenStream.
|
||||
*/
|
||||
public final class TokenizerChain extends SolrAnalyzer {
|
||||
private static final CharFilterFactory[] EMPTY_CHAR_FITLERS = new CharFilterFactory[0];
|
||||
private static final TokenFilterFactory[] EMPTY_TOKEN_FITLERS = new TokenFilterFactory[0];
|
||||
|
||||
final private CharFilterFactory[] charFilters;
|
||||
final private TokenizerFactory tokenizer;
|
||||
final private TokenFilterFactory[] filters;
|
||||
|
||||
/**
|
||||
* Creates a new TokenizerChain w/o any CharFilterFactories.
|
||||
*
|
||||
* @param tokenizer Factory for the Tokenizer to use, must not be null.
|
||||
* @param filters Factories for the TokenFilters to use - if null, will be treated as if empty.
|
||||
*/
|
||||
public TokenizerChain(TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
|
||||
this(null,tokenizer,filters);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new TokenizerChain.
|
||||
*
|
||||
* @param charFilters Factories for the CharFilters to use, if any - if null, will be treated as if empty.
|
||||
* @param tokenizer Factory for the Tokenizer to use, must not be null.
|
||||
* @param filters Factories for the TokenFilters to use if any- if null, will be treated as if empty.
|
||||
*/
|
||||
public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
|
||||
charFilters = null == charFilters ? EMPTY_CHAR_FITLERS : charFilters;
|
||||
filters = null == filters ? EMPTY_TOKEN_FITLERS : filters;
|
||||
if (null == tokenizer) {
|
||||
throw new NullPointerException("TokenizerFactory must not be null");
|
||||
}
|
||||
|
||||
this.charFilters = charFilters;
|
||||
this.tokenizer = tokenizer;
|
||||
this.filters = filters;
|
||||
}
|
||||
|
||||
/** @return array of CharFilterFactories, may be empty but never null */
|
||||
public CharFilterFactory[] getCharFilterFactories() { return charFilters; }
|
||||
/** @return the TokenizerFactory in use, will never be null */
|
||||
public TokenizerFactory getTokenizerFactory() { return tokenizer; }
|
||||
/** @return array of TokenFilterFactories, may be empty but never null */
|
||||
public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
|
||||
|
||||
@Override
|
||||
|
|
|
@ -104,7 +104,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
|
||||
NamedList<Object> namedList = new NamedList<>();
|
||||
|
||||
if( cfiltfacs != null ){
|
||||
if (0 < cfiltfacs.length) {
|
||||
String source = value;
|
||||
for(CharFilterFactory cfiltfac : cfiltfacs ){
|
||||
Reader reader = new StringReader(source);
|
||||
|
|
|
@ -497,15 +497,15 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
TokenizerChain tchain = (TokenizerChain)analyzer;
|
||||
|
||||
CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
|
||||
SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<>();
|
||||
for (CharFilterFactory cfiltfac : cfiltfacs) {
|
||||
Map<String, Object> tok = new HashMap<>();
|
||||
String className = cfiltfac.getClass().getName();
|
||||
tok.put("className", className);
|
||||
tok.put("args", cfiltfac.getOriginalArgs());
|
||||
cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
|
||||
}
|
||||
if (cfilters.size() > 0) {
|
||||
if (0 < cfiltfacs.length) {
|
||||
SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<>();
|
||||
for (CharFilterFactory cfiltfac : cfiltfacs) {
|
||||
Map<String, Object> tok = new HashMap<>();
|
||||
String className = cfiltfac.getClass().getName();
|
||||
tok.put("className", className);
|
||||
tok.put("args", cfiltfac.getOriginalArgs());
|
||||
cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
|
||||
}
|
||||
aninfo.add("charFilters", cfilters);
|
||||
}
|
||||
|
||||
|
@ -516,15 +516,15 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
aninfo.add("tokenizer", tokenizer);
|
||||
|
||||
TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
|
||||
SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<>();
|
||||
for (TokenFilterFactory filtfac : filtfacs) {
|
||||
Map<String, Object> tok = new HashMap<>();
|
||||
String className = filtfac.getClass().getName();
|
||||
tok.put("className", className);
|
||||
tok.put("args", filtfac.getOriginalArgs());
|
||||
filters.add(className.substring(className.lastIndexOf('.')+1), tok);
|
||||
}
|
||||
if (filters.size() > 0) {
|
||||
if (0 < filtfacs.length) {
|
||||
SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<>();
|
||||
for (TokenFilterFactory filtfac : filtfacs) {
|
||||
Map<String, Object> tok = new HashMap<>();
|
||||
String className = filtfac.getClass().getName();
|
||||
tok.put("className", className);
|
||||
tok.put("args", filtfac.getOriginalArgs());
|
||||
filters.add(className.substring(className.lastIndexOf('.')+1), tok);
|
||||
}
|
||||
aninfo.add("filters", filters);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -883,7 +883,7 @@ public abstract class FieldType extends FieldProperties {
|
|||
Map<String,String> factoryArgs;
|
||||
TokenizerChain tokenizerChain = (TokenizerChain)analyzer;
|
||||
CharFilterFactory[] charFilterFactories = tokenizerChain.getCharFilterFactories();
|
||||
if (null != charFilterFactories && charFilterFactories.length > 0) {
|
||||
if (0 < charFilterFactories.length) {
|
||||
List<SimpleOrderedMap<Object>> charFilterProps = new ArrayList<>();
|
||||
for (CharFilterFactory charFilterFactory : charFilterFactories) {
|
||||
SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
|
||||
|
@ -927,7 +927,7 @@ public abstract class FieldType extends FieldProperties {
|
|||
analyzerProps.add(TOKENIZER, tokenizerProps);
|
||||
|
||||
TokenFilterFactory[] filterFactories = tokenizerChain.getTokenFilterFactories();
|
||||
if (null != filterFactories && filterFactories.length > 0) {
|
||||
if (0 < filterFactories.length) {
|
||||
List<SimpleOrderedMap<Object>> filterProps = new ArrayList<>();
|
||||
for (TokenFilterFactory filterFactory : filterFactories) {
|
||||
SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
|
||||
|
|
|
@ -175,10 +175,8 @@ public final class FieldTypePluginLoader
|
|||
MultiTermChainBuilder builder = new MultiTermChainBuilder();
|
||||
|
||||
CharFilterFactory[] charFactories = tc.getCharFilterFactories();
|
||||
if (charFactories != null) {
|
||||
for (CharFilterFactory fact : charFactories) {
|
||||
builder.add(fact);
|
||||
}
|
||||
for (CharFilterFactory fact : charFactories) {
|
||||
builder.add(fact);
|
||||
}
|
||||
|
||||
builder.add(tc.getTokenizerFactory());
|
||||
|
|
|
@ -1285,20 +1285,18 @@ public final class ManagedIndexSchema extends IndexSchema {
|
|||
*/
|
||||
protected void informResourceLoaderAwareObjectsInChain(TokenizerChain chain) {
|
||||
CharFilterFactory[] charFilters = chain.getCharFilterFactories();
|
||||
if (charFilters != null) {
|
||||
for (CharFilterFactory next : charFilters) {
|
||||
if (next instanceof ResourceLoaderAware) {
|
||||
try {
|
||||
((ResourceLoaderAware) next).inform(loader);
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
for (CharFilterFactory next : charFilters) {
|
||||
if (next instanceof ResourceLoaderAware) {
|
||||
try {
|
||||
((ResourceLoaderAware) next).inform(loader);
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TokenizerFactory tokenizerFactory = chain.getTokenizerFactory();
|
||||
if (tokenizerFactory != null && tokenizerFactory instanceof ResourceLoaderAware) {
|
||||
if (tokenizerFactory instanceof ResourceLoaderAware) {
|
||||
try {
|
||||
((ResourceLoaderAware) tokenizerFactory).inform(loader);
|
||||
} catch (IOException e) {
|
||||
|
@ -1307,14 +1305,12 @@ public final class ManagedIndexSchema extends IndexSchema {
|
|||
}
|
||||
|
||||
TokenFilterFactory[] filters = chain.getTokenFilterFactories();
|
||||
if (filters != null) {
|
||||
for (TokenFilterFactory next : filters) {
|
||||
if (next instanceof ResourceLoaderAware) {
|
||||
try {
|
||||
((ResourceLoaderAware) next).inform(loader);
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
for (TokenFilterFactory next : filters) {
|
||||
if (next instanceof ResourceLoaderAware) {
|
||||
try {
|
||||
((ResourceLoaderAware) next).inform(loader);
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<schema name="test" version="1.4">
|
||||
<fieldType name="string" class="solr.StrField"/>
|
||||
|
||||
<!-- field type with custom TokenizerChain analyzer (created with 2-arg constructor, see SOLR-7765) -->
|
||||
<fieldType name="custom_tc_string" class="solr.CustomAnalyzerStrField"/>
|
||||
|
||||
|
||||
<dynamicField name="*" type="string" indexed="true" stored="true" />
|
||||
</schema>
|
|
@ -20,6 +20,7 @@ package org.apache.solr.handler.admin;
|
|||
import org.apache.solr.common.luke.FieldFlag;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.CustomAnalyzerStrField; // jdoc
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
import org.apache.solr.util.TestHarness;
|
||||
import org.junit.Before;
|
||||
|
@ -198,6 +199,27 @@ public class LukeRequestHandlerTest extends AbstractSolrTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/** @see CustomAnalyzerStrField */
|
||||
public void testNullFactories() throws Exception {
|
||||
deleteCore();
|
||||
initCore("solrconfig.xml", "schema-null-charfilters-analyzer.xml");
|
||||
|
||||
try {
|
||||
assertQ(req("qt", "/admin/luke", "show", "schema")
|
||||
, "//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']"
|
||||
, "//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']"
|
||||
, "0=count(//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']/lst[@name='filters'])"
|
||||
, "0=count(//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']/lst[@name='filters'])"
|
||||
, "0=count(//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']/lst[@name='charFilters'])"
|
||||
, "0=count(//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']/lst[@name='charFilters'])"
|
||||
);
|
||||
} finally {
|
||||
// Put back the configuration expected by the rest of the tests in this suite
|
||||
deleteCore();
|
||||
initCore("solrconfig.xml", "schema12.xml");
|
||||
}
|
||||
}
|
||||
|
||||
public void testCopyFieldLists() throws Exception {
|
||||
SolrQueryRequest req = req("qt", "/admin/luke", "show", "schema");
|
||||
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
package org.apache.solr.schema;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
import org.apache.solr.handler.admin.LukeRequestHandlerTest; // jdoc
|
||||
|
||||
/**
|
||||
* A Test only custom FieldType that specifies null for various params when constructing
|
||||
* TokenizerChain instances to ensure that they are still well behaved.
|
||||
*
|
||||
* @see LukeRequestHandlerTest#testNullFactories
|
||||
*/
|
||||
public class CustomAnalyzerStrField extends StrField {
|
||||
private final Analyzer indexAnalyzer;
|
||||
private final Analyzer queryAnalyzer;
|
||||
|
||||
public CustomAnalyzerStrField() {
|
||||
Random r = LuceneTestCase.random();
|
||||
|
||||
// two arg constructor
|
||||
Analyzer a2 = new TokenizerChain
|
||||
(new KeywordTokenizerFactory(new HashMap<>()),
|
||||
r.nextBoolean() ? null : new TokenFilterFactory[0]);
|
||||
|
||||
// three arg constructor
|
||||
Analyzer a3 = new TokenizerChain
|
||||
(r.nextBoolean() ? null : new CharFilterFactory[0],
|
||||
new KeywordTokenizerFactory(new HashMap<>()),
|
||||
r.nextBoolean() ? null : new TokenFilterFactory[0]);
|
||||
|
||||
if (r.nextBoolean()) {
|
||||
indexAnalyzer = a2;
|
||||
queryAnalyzer = a3;
|
||||
} else {
|
||||
queryAnalyzer = a2;
|
||||
indexAnalyzer = a3;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Analyzer getIndexAnalyzer() {
|
||||
return indexAnalyzer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Analyzer getQueryAnalyzer() {
|
||||
return queryAnalyzer;
|
||||
}
|
||||
}
|
|
@ -88,7 +88,7 @@ public class MultiTermTest extends SolrTestCaseJ4 {
|
|||
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
|
||||
}
|
||||
|
||||
assertTrue(tc.getCharFilterFactories() == null);
|
||||
assertTrue(tc.getCharFilterFactories().length == 0);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue