SOLR-7765: Hardened the behavior of TokenizerChain when null arguments are used in constructor. This prevents NPEs in some code paths.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692170 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2015-07-21 16:49:37 +00:00
parent 585f8a7b26
commit 9e61daf0fa
11 changed files with 189 additions and 44 deletions

View File

@ -239,6 +239,9 @@ Bug Fixes
* SOLR-7810: map-reduce contrib script to set classpath for convenience refers to example
rather than server. (Mark Miller)
* SOLR-7765: Hardened the behavior of TokenizerChain when null arguments are used in constructor.
This prevents NPEs in some code paths. (Konstantin Gribov, hossman)
Optimizations
----------------------

View File

@ -29,22 +29,47 @@ import java.io.Reader;
* create a TokenStream.
*/
public final class TokenizerChain extends SolrAnalyzer {
private static final CharFilterFactory[] EMPTY_CHAR_FITLERS = new CharFilterFactory[0];
private static final TokenFilterFactory[] EMPTY_TOKEN_FITLERS = new TokenFilterFactory[0];
final private CharFilterFactory[] charFilters;
final private TokenizerFactory tokenizer;
final private TokenFilterFactory[] filters;
/**
* Creates a new TokenizerChain w/o any CharFilterFactories.
*
* @param tokenizer Factory for the Tokenizer to use, must not be null.
* @param filters Factories for the TokenFilters to use - if null, will be treated as if empty.
*/
public TokenizerChain(TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
this(null,tokenizer,filters);
}
/**
* Creates a new TokenizerChain.
*
* @param charFilters Factories for the CharFilters to use, if any - if null, will be treated as if empty.
* @param tokenizer Factory for the Tokenizer to use, must not be null.
* @param filters Factories for the TokenFilters to use if any- if null, will be treated as if empty.
*/
public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
charFilters = null == charFilters ? EMPTY_CHAR_FITLERS : charFilters;
filters = null == filters ? EMPTY_TOKEN_FITLERS : filters;
if (null == tokenizer) {
throw new NullPointerException("TokenizerFactory must not be null");
}
this.charFilters = charFilters;
this.tokenizer = tokenizer;
this.filters = filters;
}
/** @return array of CharFilterFactories, may be empty but never null */
public CharFilterFactory[] getCharFilterFactories() { return charFilters; }
/** @return the TokenizerFactory in use, will never be null */
public TokenizerFactory getTokenizerFactory() { return tokenizer; }
/** @return array of TokenFilterFactories, may be empty but never null */
public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
@Override

View File

@ -104,7 +104,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
NamedList<Object> namedList = new NamedList<>();
if( cfiltfacs != null ){
if (0 < cfiltfacs.length) {
String source = value;
for(CharFilterFactory cfiltfac : cfiltfacs ){
Reader reader = new StringReader(source);

View File

@ -497,15 +497,15 @@ public class LukeRequestHandler extends RequestHandlerBase
TokenizerChain tchain = (TokenizerChain)analyzer;
CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<>();
for (CharFilterFactory cfiltfac : cfiltfacs) {
Map<String, Object> tok = new HashMap<>();
String className = cfiltfac.getClass().getName();
tok.put("className", className);
tok.put("args", cfiltfac.getOriginalArgs());
cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
}
if (cfilters.size() > 0) {
if (0 < cfiltfacs.length) {
SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<>();
for (CharFilterFactory cfiltfac : cfiltfacs) {
Map<String, Object> tok = new HashMap<>();
String className = cfiltfac.getClass().getName();
tok.put("className", className);
tok.put("args", cfiltfac.getOriginalArgs());
cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
}
aninfo.add("charFilters", cfilters);
}
@ -516,15 +516,15 @@ public class LukeRequestHandler extends RequestHandlerBase
aninfo.add("tokenizer", tokenizer);
TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<>();
for (TokenFilterFactory filtfac : filtfacs) {
Map<String, Object> tok = new HashMap<>();
String className = filtfac.getClass().getName();
tok.put("className", className);
tok.put("args", filtfac.getOriginalArgs());
filters.add(className.substring(className.lastIndexOf('.')+1), tok);
}
if (filters.size() > 0) {
if (0 < filtfacs.length) {
SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<>();
for (TokenFilterFactory filtfac : filtfacs) {
Map<String, Object> tok = new HashMap<>();
String className = filtfac.getClass().getName();
tok.put("className", className);
tok.put("args", filtfac.getOriginalArgs());
filters.add(className.substring(className.lastIndexOf('.')+1), tok);
}
aninfo.add("filters", filters);
}
}

View File

@ -883,7 +883,7 @@ public abstract class FieldType extends FieldProperties {
Map<String,String> factoryArgs;
TokenizerChain tokenizerChain = (TokenizerChain)analyzer;
CharFilterFactory[] charFilterFactories = tokenizerChain.getCharFilterFactories();
if (null != charFilterFactories && charFilterFactories.length > 0) {
if (0 < charFilterFactories.length) {
List<SimpleOrderedMap<Object>> charFilterProps = new ArrayList<>();
for (CharFilterFactory charFilterFactory : charFilterFactories) {
SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
@ -927,7 +927,7 @@ public abstract class FieldType extends FieldProperties {
analyzerProps.add(TOKENIZER, tokenizerProps);
TokenFilterFactory[] filterFactories = tokenizerChain.getTokenFilterFactories();
if (null != filterFactories && filterFactories.length > 0) {
if (0 < filterFactories.length) {
List<SimpleOrderedMap<Object>> filterProps = new ArrayList<>();
for (TokenFilterFactory filterFactory : filterFactories) {
SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();

View File

@ -175,10 +175,8 @@ public final class FieldTypePluginLoader
MultiTermChainBuilder builder = new MultiTermChainBuilder();
CharFilterFactory[] charFactories = tc.getCharFilterFactories();
if (charFactories != null) {
for (CharFilterFactory fact : charFactories) {
builder.add(fact);
}
for (CharFilterFactory fact : charFactories) {
builder.add(fact);
}
builder.add(tc.getTokenizerFactory());

View File

@ -1285,20 +1285,18 @@ public final class ManagedIndexSchema extends IndexSchema {
*/
protected void informResourceLoaderAwareObjectsInChain(TokenizerChain chain) {
CharFilterFactory[] charFilters = chain.getCharFilterFactories();
if (charFilters != null) {
for (CharFilterFactory next : charFilters) {
if (next instanceof ResourceLoaderAware) {
try {
((ResourceLoaderAware) next).inform(loader);
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
for (CharFilterFactory next : charFilters) {
if (next instanceof ResourceLoaderAware) {
try {
((ResourceLoaderAware) next).inform(loader);
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
}
}
TokenizerFactory tokenizerFactory = chain.getTokenizerFactory();
if (tokenizerFactory != null && tokenizerFactory instanceof ResourceLoaderAware) {
if (tokenizerFactory instanceof ResourceLoaderAware) {
try {
((ResourceLoaderAware) tokenizerFactory).inform(loader);
} catch (IOException e) {
@ -1307,14 +1305,12 @@ public final class ManagedIndexSchema extends IndexSchema {
}
TokenFilterFactory[] filters = chain.getTokenFilterFactories();
if (filters != null) {
for (TokenFilterFactory next : filters) {
if (next instanceof ResourceLoaderAware) {
try {
((ResourceLoaderAware) next).inform(loader);
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
for (TokenFilterFactory next : filters) {
if (next instanceof ResourceLoaderAware) {
try {
((ResourceLoaderAware) next).inform(loader);
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
}

View File

@ -0,0 +1,27 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="test" version="1.4">
<fieldType name="string" class="solr.StrField"/>
<!-- field type with custom TokenizerChain analyzer (created with 2-arg constructor, see SOLR-7765) -->
<fieldType name="custom_tc_string" class="solr.CustomAnalyzerStrField"/>
<dynamicField name="*" type="string" indexed="true" stored="true" />
</schema>

View File

@ -20,6 +20,7 @@ package org.apache.solr.handler.admin;
import org.apache.solr.common.luke.FieldFlag;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.CustomAnalyzerStrField; // jdoc
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.solr.util.TestHarness;
import org.junit.Before;
@ -198,6 +199,27 @@ public class LukeRequestHandlerTest extends AbstractSolrTestCase {
}
}
/** @see CustomAnalyzerStrField */
public void testNullFactories() throws Exception {
deleteCore();
initCore("solrconfig.xml", "schema-null-charfilters-analyzer.xml");
try {
assertQ(req("qt", "/admin/luke", "show", "schema")
, "//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']"
, "//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']"
, "0=count(//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']/lst[@name='filters'])"
, "0=count(//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']/lst[@name='filters'])"
, "0=count(//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']/lst[@name='charFilters'])"
, "0=count(//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']/lst[@name='charFilters'])"
);
} finally {
// Put back the configuration expected by the rest of the tests in this suite
deleteCore();
initCore("solrconfig.xml", "schema12.xml");
}
}
public void testCopyFieldLists() throws Exception {
SolrQueryRequest req = req("qt", "/admin/luke", "show", "schema");

View File

@ -0,0 +1,74 @@
package org.apache.solr.schema;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.HashMap;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.handler.admin.LukeRequestHandlerTest; // jdoc
/**
* A Test only custom FieldType that specifies null for various params when constructing
* TokenizerChain instances to ensure that they are still well behaved.
*
* @see LukeRequestHandlerTest#testNullFactories
*/
public class CustomAnalyzerStrField extends StrField {
private final Analyzer indexAnalyzer;
private final Analyzer queryAnalyzer;
public CustomAnalyzerStrField() {
Random r = LuceneTestCase.random();
// two arg constructor
Analyzer a2 = new TokenizerChain
(new KeywordTokenizerFactory(new HashMap<>()),
r.nextBoolean() ? null : new TokenFilterFactory[0]);
// three arg constructor
Analyzer a3 = new TokenizerChain
(r.nextBoolean() ? null : new CharFilterFactory[0],
new KeywordTokenizerFactory(new HashMap<>()),
r.nextBoolean() ? null : new TokenFilterFactory[0]);
if (r.nextBoolean()) {
indexAnalyzer = a2;
queryAnalyzer = a3;
} else {
queryAnalyzer = a2;
indexAnalyzer = a3;
}
}
@Override
public Analyzer getIndexAnalyzer() {
return indexAnalyzer;
}
@Override
public Analyzer getQueryAnalyzer() {
return queryAnalyzer;
}
}

View File

@ -88,7 +88,7 @@ public class MultiTermTest extends SolrTestCaseJ4 {
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
}
assertTrue(tc.getCharFilterFactories() == null);
assertTrue(tc.getCharFilterFactories().length == 0);
}
}