diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 0024f3bb7fc..f70f008ca9c 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -239,6 +239,9 @@ Bug Fixes * SOLR-7810: map-reduce contrib script to set classpath for convenience refers to example rather than server. (Mark Miller) +* SOLR-7765: Hardened the behavior of TokenizerChain when null arguments are used in constructor. + This prevents NPEs in some code paths. (Konstantin Gribov, hossman) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java index 2599d620393..ca51d69fbda 100644 --- a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java +++ b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java @@ -29,22 +29,47 @@ import java.io.Reader; * create a TokenStream. */ public final class TokenizerChain extends SolrAnalyzer { + private static final CharFilterFactory[] EMPTY_CHAR_FITLERS = new CharFilterFactory[0]; + private static final TokenFilterFactory[] EMPTY_TOKEN_FITLERS = new TokenFilterFactory[0]; + final private CharFilterFactory[] charFilters; final private TokenizerFactory tokenizer; final private TokenFilterFactory[] filters; + /** + * Creates a new TokenizerChain w/o any CharFilterFactories. + * + * @param tokenizer Factory for the Tokenizer to use, must not be null. + * @param filters Factories for the TokenFilters to use - if null, will be treated as if empty. + */ public TokenizerChain(TokenizerFactory tokenizer, TokenFilterFactory[] filters) { this(null,tokenizer,filters); } + /** + * Creates a new TokenizerChain. + * + * @param charFilters Factories for the CharFilters to use, if any - if null, will be treated as if empty. + * @param tokenizer Factory for the Tokenizer to use, must not be null. + * @param filters Factories for the TokenFilters to use if any- if null, will be treated as if empty. + */ public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) { + charFilters = null == charFilters ? EMPTY_CHAR_FITLERS : charFilters; + filters = null == filters ? EMPTY_TOKEN_FITLERS : filters; + if (null == tokenizer) { + throw new NullPointerException("TokenizerFactory must not be null"); + } + this.charFilters = charFilters; this.tokenizer = tokenizer; this.filters = filters; } + /** @return array of CharFilterFactories, may be empty but never null */ public CharFilterFactory[] getCharFilterFactories() { return charFilters; } + /** @return the TokenizerFactory in use, will never be null */ public TokenizerFactory getTokenizerFactory() { return tokenizer; } + /** @return array of TokenFilterFactories, may be empty but never null */ public TokenFilterFactory[] getTokenFilterFactories() { return filters; } @Override diff --git a/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java index f9aa75286df..0f5b0ea7f7c 100644 --- a/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java +++ b/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java @@ -104,7 +104,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { NamedList namedList = new NamedList<>(); - if( cfiltfacs != null ){ + if (0 < cfiltfacs.length) { String source = value; for(CharFilterFactory cfiltfac : cfiltfacs ){ Reader reader = new StringReader(source); diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 197f7092038..fe7b400b6de 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -497,15 +497,15 @@ public class LukeRequestHandler extends RequestHandlerBase TokenizerChain tchain = (TokenizerChain)analyzer; CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories(); - SimpleOrderedMap> cfilters = new SimpleOrderedMap<>(); - for (CharFilterFactory cfiltfac : cfiltfacs) { - Map tok = new HashMap<>(); - String className = cfiltfac.getClass().getName(); - tok.put("className", className); - tok.put("args", cfiltfac.getOriginalArgs()); - cfilters.add(className.substring(className.lastIndexOf('.')+1), tok); - } - if (cfilters.size() > 0) { + if (0 < cfiltfacs.length) { + SimpleOrderedMap> cfilters = new SimpleOrderedMap<>(); + for (CharFilterFactory cfiltfac : cfiltfacs) { + Map tok = new HashMap<>(); + String className = cfiltfac.getClass().getName(); + tok.put("className", className); + tok.put("args", cfiltfac.getOriginalArgs()); + cfilters.add(className.substring(className.lastIndexOf('.')+1), tok); + } aninfo.add("charFilters", cfilters); } @@ -516,15 +516,15 @@ public class LukeRequestHandler extends RequestHandlerBase aninfo.add("tokenizer", tokenizer); TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories(); - SimpleOrderedMap> filters = new SimpleOrderedMap<>(); - for (TokenFilterFactory filtfac : filtfacs) { - Map tok = new HashMap<>(); - String className = filtfac.getClass().getName(); - tok.put("className", className); - tok.put("args", filtfac.getOriginalArgs()); - filters.add(className.substring(className.lastIndexOf('.')+1), tok); - } - if (filters.size() > 0) { + if (0 < filtfacs.length) { + SimpleOrderedMap> filters = new SimpleOrderedMap<>(); + for (TokenFilterFactory filtfac : filtfacs) { + Map tok = new HashMap<>(); + String className = filtfac.getClass().getName(); + tok.put("className", className); + tok.put("args", filtfac.getOriginalArgs()); + filters.add(className.substring(className.lastIndexOf('.')+1), tok); + } aninfo.add("filters", filters); } } diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index c23744fb307..41b89889e3b 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -883,7 +883,7 @@ public abstract class FieldType extends FieldProperties { Map factoryArgs; TokenizerChain tokenizerChain = (TokenizerChain)analyzer; CharFilterFactory[] charFilterFactories = tokenizerChain.getCharFilterFactories(); - if (null != charFilterFactories && charFilterFactories.length > 0) { + if (0 < charFilterFactories.length) { List> charFilterProps = new ArrayList<>(); for (CharFilterFactory charFilterFactory : charFilterFactories) { SimpleOrderedMap props = new SimpleOrderedMap<>(); @@ -927,7 +927,7 @@ public abstract class FieldType extends FieldProperties { analyzerProps.add(TOKENIZER, tokenizerProps); TokenFilterFactory[] filterFactories = tokenizerChain.getTokenFilterFactories(); - if (null != filterFactories && filterFactories.length > 0) { + if (0 < filterFactories.length) { List> filterProps = new ArrayList<>(); for (TokenFilterFactory filterFactory : filterFactories) { SimpleOrderedMap props = new SimpleOrderedMap<>(); diff --git a/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java b/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java index 780aa19f910..da560ea0bfa 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java @@ -175,10 +175,8 @@ public final class FieldTypePluginLoader MultiTermChainBuilder builder = new MultiTermChainBuilder(); CharFilterFactory[] charFactories = tc.getCharFilterFactories(); - if (charFactories != null) { - for (CharFilterFactory fact : charFactories) { - builder.add(fact); - } + for (CharFilterFactory fact : charFactories) { + builder.add(fact); } builder.add(tc.getTokenizerFactory()); diff --git a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java index 042a4461411..8a0ea630da2 100644 --- a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java +++ b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java @@ -1285,20 +1285,18 @@ public final class ManagedIndexSchema extends IndexSchema { */ protected void informResourceLoaderAwareObjectsInChain(TokenizerChain chain) { CharFilterFactory[] charFilters = chain.getCharFilterFactories(); - if (charFilters != null) { - for (CharFilterFactory next : charFilters) { - if (next instanceof ResourceLoaderAware) { - try { - ((ResourceLoaderAware) next).inform(loader); - } catch (IOException e) { - throw new SolrException(ErrorCode.SERVER_ERROR, e); - } + for (CharFilterFactory next : charFilters) { + if (next instanceof ResourceLoaderAware) { + try { + ((ResourceLoaderAware) next).inform(loader); + } catch (IOException e) { + throw new SolrException(ErrorCode.SERVER_ERROR, e); + } } - } } TokenizerFactory tokenizerFactory = chain.getTokenizerFactory(); - if (tokenizerFactory != null && tokenizerFactory instanceof ResourceLoaderAware) { + if (tokenizerFactory instanceof ResourceLoaderAware) { try { ((ResourceLoaderAware) tokenizerFactory).inform(loader); } catch (IOException e) { @@ -1307,14 +1305,12 @@ public final class ManagedIndexSchema extends IndexSchema { } TokenFilterFactory[] filters = chain.getTokenFilterFactories(); - if (filters != null) { - for (TokenFilterFactory next : filters) { - if (next instanceof ResourceLoaderAware) { - try { - ((ResourceLoaderAware) next).inform(loader); - } catch (IOException e) { - throw new SolrException(ErrorCode.SERVER_ERROR, e); - } + for (TokenFilterFactory next : filters) { + if (next instanceof ResourceLoaderAware) { + try { + ((ResourceLoaderAware) next).inform(loader); + } catch (IOException e) { + throw new SolrException(ErrorCode.SERVER_ERROR, e); } } } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-null-charfilters-analyzer.xml b/solr/core/src/test-files/solr/collection1/conf/schema-null-charfilters-analyzer.xml new file mode 100644 index 00000000000..1ad96ca958a --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-null-charfilters-analyzer.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + + diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java index 7467e2c149c..b16e45fbd8d 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java @@ -20,6 +20,7 @@ package org.apache.solr.handler.admin; import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.CustomAnalyzerStrField; // jdoc import org.apache.solr.util.AbstractSolrTestCase; import org.apache.solr.util.TestHarness; import org.junit.Before; @@ -198,6 +199,27 @@ public class LukeRequestHandlerTest extends AbstractSolrTestCase { } } + /** @see CustomAnalyzerStrField */ + public void testNullFactories() throws Exception { + deleteCore(); + initCore("solrconfig.xml", "schema-null-charfilters-analyzer.xml"); + + try { + assertQ(req("qt", "/admin/luke", "show", "schema") + , "//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']" + , "//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']" + , "0=count(//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']/lst[@name='filters'])" + , "0=count(//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']/lst[@name='filters'])" + , "0=count(//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']/lst[@name='charFilters'])" + , "0=count(//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']/lst[@name='charFilters'])" + ); + } finally { + // Put back the configuration expected by the rest of the tests in this suite + deleteCore(); + initCore("solrconfig.xml", "schema12.xml"); + } + } + public void testCopyFieldLists() throws Exception { SolrQueryRequest req = req("qt", "/admin/luke", "show", "schema"); diff --git a/solr/core/src/test/org/apache/solr/schema/CustomAnalyzerStrField.java b/solr/core/src/test/org/apache/solr/schema/CustomAnalyzerStrField.java new file mode 100644 index 00000000000..c24114a05c7 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/schema/CustomAnalyzerStrField.java @@ -0,0 +1,74 @@ +package org.apache.solr.schema; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.HashMap; +import java.util.Random; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.KeywordTokenizerFactory; +import org.apache.lucene.analysis.util.TokenFilterFactory; +import org.apache.lucene.analysis.util.CharFilterFactory; +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.solr.analysis.TokenizerChain; +import org.apache.solr.handler.admin.LukeRequestHandlerTest; // jdoc + +/** + * A Test only custom FieldType that specifies null for various params when constructing + * TokenizerChain instances to ensure that they are still well behaved. + * + * @see LukeRequestHandlerTest#testNullFactories + */ +public class CustomAnalyzerStrField extends StrField { + private final Analyzer indexAnalyzer; + private final Analyzer queryAnalyzer; + + public CustomAnalyzerStrField() { + Random r = LuceneTestCase.random(); + + // two arg constructor + Analyzer a2 = new TokenizerChain + (new KeywordTokenizerFactory(new HashMap<>()), + r.nextBoolean() ? null : new TokenFilterFactory[0]); + + // three arg constructor + Analyzer a3 = new TokenizerChain + (r.nextBoolean() ? null : new CharFilterFactory[0], + new KeywordTokenizerFactory(new HashMap<>()), + r.nextBoolean() ? null : new TokenFilterFactory[0]); + + if (r.nextBoolean()) { + indexAnalyzer = a2; + queryAnalyzer = a3; + } else { + queryAnalyzer = a2; + indexAnalyzer = a3; + } + } + + @Override + public Analyzer getIndexAnalyzer() { + return indexAnalyzer; + } + + @Override + public Analyzer getQueryAnalyzer() { + return queryAnalyzer; + } +} diff --git a/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java b/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java index ae5b59aaca9..ba8515a3da3 100644 --- a/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java +++ b/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java @@ -88,7 +88,7 @@ public class MultiTermTest extends SolrTestCaseJ4 { assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory)); } - assertTrue(tc.getCharFilterFactories() == null); + assertTrue(tc.getCharFilterFactories().length == 0); } }