mirror of https://github.com/apache/lucene.git
SOLR-7765: Hardened the behavior of TokenizerChain when null arguments are used in constructor. This prevents NPEs in some code paths.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692170 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
585f8a7b26
commit
9e61daf0fa
|
@ -239,6 +239,9 @@ Bug Fixes
|
||||||
* SOLR-7810: map-reduce contrib script to set classpath for convenience refers to example
|
* SOLR-7810: map-reduce contrib script to set classpath for convenience refers to example
|
||||||
rather than server. (Mark Miller)
|
rather than server. (Mark Miller)
|
||||||
|
|
||||||
|
* SOLR-7765: Hardened the behavior of TokenizerChain when null arguments are used in constructor.
|
||||||
|
This prevents NPEs in some code paths. (Konstantin Gribov, hossman)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -29,22 +29,47 @@ import java.io.Reader;
|
||||||
* create a TokenStream.
|
* create a TokenStream.
|
||||||
*/
|
*/
|
||||||
public final class TokenizerChain extends SolrAnalyzer {
|
public final class TokenizerChain extends SolrAnalyzer {
|
||||||
|
private static final CharFilterFactory[] EMPTY_CHAR_FITLERS = new CharFilterFactory[0];
|
||||||
|
private static final TokenFilterFactory[] EMPTY_TOKEN_FITLERS = new TokenFilterFactory[0];
|
||||||
|
|
||||||
final private CharFilterFactory[] charFilters;
|
final private CharFilterFactory[] charFilters;
|
||||||
final private TokenizerFactory tokenizer;
|
final private TokenizerFactory tokenizer;
|
||||||
final private TokenFilterFactory[] filters;
|
final private TokenFilterFactory[] filters;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new TokenizerChain w/o any CharFilterFactories.
|
||||||
|
*
|
||||||
|
* @param tokenizer Factory for the Tokenizer to use, must not be null.
|
||||||
|
* @param filters Factories for the TokenFilters to use - if null, will be treated as if empty.
|
||||||
|
*/
|
||||||
public TokenizerChain(TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
|
public TokenizerChain(TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
|
||||||
this(null,tokenizer,filters);
|
this(null,tokenizer,filters);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new TokenizerChain.
|
||||||
|
*
|
||||||
|
* @param charFilters Factories for the CharFilters to use, if any - if null, will be treated as if empty.
|
||||||
|
* @param tokenizer Factory for the Tokenizer to use, must not be null.
|
||||||
|
* @param filters Factories for the TokenFilters to use if any- if null, will be treated as if empty.
|
||||||
|
*/
|
||||||
public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
|
public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
|
||||||
|
charFilters = null == charFilters ? EMPTY_CHAR_FITLERS : charFilters;
|
||||||
|
filters = null == filters ? EMPTY_TOKEN_FITLERS : filters;
|
||||||
|
if (null == tokenizer) {
|
||||||
|
throw new NullPointerException("TokenizerFactory must not be null");
|
||||||
|
}
|
||||||
|
|
||||||
this.charFilters = charFilters;
|
this.charFilters = charFilters;
|
||||||
this.tokenizer = tokenizer;
|
this.tokenizer = tokenizer;
|
||||||
this.filters = filters;
|
this.filters = filters;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return array of CharFilterFactories, may be empty but never null */
|
||||||
public CharFilterFactory[] getCharFilterFactories() { return charFilters; }
|
public CharFilterFactory[] getCharFilterFactories() { return charFilters; }
|
||||||
|
/** @return the TokenizerFactory in use, will never be null */
|
||||||
public TokenizerFactory getTokenizerFactory() { return tokenizer; }
|
public TokenizerFactory getTokenizerFactory() { return tokenizer; }
|
||||||
|
/** @return array of TokenFilterFactories, may be empty but never null */
|
||||||
public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
|
public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -104,7 +104,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||||
|
|
||||||
NamedList<Object> namedList = new NamedList<>();
|
NamedList<Object> namedList = new NamedList<>();
|
||||||
|
|
||||||
if( cfiltfacs != null ){
|
if (0 < cfiltfacs.length) {
|
||||||
String source = value;
|
String source = value;
|
||||||
for(CharFilterFactory cfiltfac : cfiltfacs ){
|
for(CharFilterFactory cfiltfac : cfiltfacs ){
|
||||||
Reader reader = new StringReader(source);
|
Reader reader = new StringReader(source);
|
||||||
|
|
|
@ -497,6 +497,7 @@ public class LukeRequestHandler extends RequestHandlerBase
|
||||||
TokenizerChain tchain = (TokenizerChain)analyzer;
|
TokenizerChain tchain = (TokenizerChain)analyzer;
|
||||||
|
|
||||||
CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
|
CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
|
||||||
|
if (0 < cfiltfacs.length) {
|
||||||
SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<>();
|
SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<>();
|
||||||
for (CharFilterFactory cfiltfac : cfiltfacs) {
|
for (CharFilterFactory cfiltfac : cfiltfacs) {
|
||||||
Map<String, Object> tok = new HashMap<>();
|
Map<String, Object> tok = new HashMap<>();
|
||||||
|
@ -505,7 +506,6 @@ public class LukeRequestHandler extends RequestHandlerBase
|
||||||
tok.put("args", cfiltfac.getOriginalArgs());
|
tok.put("args", cfiltfac.getOriginalArgs());
|
||||||
cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
|
cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
|
||||||
}
|
}
|
||||||
if (cfilters.size() > 0) {
|
|
||||||
aninfo.add("charFilters", cfilters);
|
aninfo.add("charFilters", cfilters);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -516,6 +516,7 @@ public class LukeRequestHandler extends RequestHandlerBase
|
||||||
aninfo.add("tokenizer", tokenizer);
|
aninfo.add("tokenizer", tokenizer);
|
||||||
|
|
||||||
TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
|
TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
|
||||||
|
if (0 < filtfacs.length) {
|
||||||
SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<>();
|
SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<>();
|
||||||
for (TokenFilterFactory filtfac : filtfacs) {
|
for (TokenFilterFactory filtfac : filtfacs) {
|
||||||
Map<String, Object> tok = new HashMap<>();
|
Map<String, Object> tok = new HashMap<>();
|
||||||
|
@ -524,7 +525,6 @@ public class LukeRequestHandler extends RequestHandlerBase
|
||||||
tok.put("args", filtfac.getOriginalArgs());
|
tok.put("args", filtfac.getOriginalArgs());
|
||||||
filters.add(className.substring(className.lastIndexOf('.')+1), tok);
|
filters.add(className.substring(className.lastIndexOf('.')+1), tok);
|
||||||
}
|
}
|
||||||
if (filters.size() > 0) {
|
|
||||||
aninfo.add("filters", filters);
|
aninfo.add("filters", filters);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -883,7 +883,7 @@ public abstract class FieldType extends FieldProperties {
|
||||||
Map<String,String> factoryArgs;
|
Map<String,String> factoryArgs;
|
||||||
TokenizerChain tokenizerChain = (TokenizerChain)analyzer;
|
TokenizerChain tokenizerChain = (TokenizerChain)analyzer;
|
||||||
CharFilterFactory[] charFilterFactories = tokenizerChain.getCharFilterFactories();
|
CharFilterFactory[] charFilterFactories = tokenizerChain.getCharFilterFactories();
|
||||||
if (null != charFilterFactories && charFilterFactories.length > 0) {
|
if (0 < charFilterFactories.length) {
|
||||||
List<SimpleOrderedMap<Object>> charFilterProps = new ArrayList<>();
|
List<SimpleOrderedMap<Object>> charFilterProps = new ArrayList<>();
|
||||||
for (CharFilterFactory charFilterFactory : charFilterFactories) {
|
for (CharFilterFactory charFilterFactory : charFilterFactories) {
|
||||||
SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
|
SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
|
||||||
|
@ -927,7 +927,7 @@ public abstract class FieldType extends FieldProperties {
|
||||||
analyzerProps.add(TOKENIZER, tokenizerProps);
|
analyzerProps.add(TOKENIZER, tokenizerProps);
|
||||||
|
|
||||||
TokenFilterFactory[] filterFactories = tokenizerChain.getTokenFilterFactories();
|
TokenFilterFactory[] filterFactories = tokenizerChain.getTokenFilterFactories();
|
||||||
if (null != filterFactories && filterFactories.length > 0) {
|
if (0 < filterFactories.length) {
|
||||||
List<SimpleOrderedMap<Object>> filterProps = new ArrayList<>();
|
List<SimpleOrderedMap<Object>> filterProps = new ArrayList<>();
|
||||||
for (TokenFilterFactory filterFactory : filterFactories) {
|
for (TokenFilterFactory filterFactory : filterFactories) {
|
||||||
SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
|
SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
|
||||||
|
|
|
@ -175,11 +175,9 @@ public final class FieldTypePluginLoader
|
||||||
MultiTermChainBuilder builder = new MultiTermChainBuilder();
|
MultiTermChainBuilder builder = new MultiTermChainBuilder();
|
||||||
|
|
||||||
CharFilterFactory[] charFactories = tc.getCharFilterFactories();
|
CharFilterFactory[] charFactories = tc.getCharFilterFactories();
|
||||||
if (charFactories != null) {
|
|
||||||
for (CharFilterFactory fact : charFactories) {
|
for (CharFilterFactory fact : charFactories) {
|
||||||
builder.add(fact);
|
builder.add(fact);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
builder.add(tc.getTokenizerFactory());
|
builder.add(tc.getTokenizerFactory());
|
||||||
|
|
||||||
|
|
|
@ -1285,7 +1285,6 @@ public final class ManagedIndexSchema extends IndexSchema {
|
||||||
*/
|
*/
|
||||||
protected void informResourceLoaderAwareObjectsInChain(TokenizerChain chain) {
|
protected void informResourceLoaderAwareObjectsInChain(TokenizerChain chain) {
|
||||||
CharFilterFactory[] charFilters = chain.getCharFilterFactories();
|
CharFilterFactory[] charFilters = chain.getCharFilterFactories();
|
||||||
if (charFilters != null) {
|
|
||||||
for (CharFilterFactory next : charFilters) {
|
for (CharFilterFactory next : charFilters) {
|
||||||
if (next instanceof ResourceLoaderAware) {
|
if (next instanceof ResourceLoaderAware) {
|
||||||
try {
|
try {
|
||||||
|
@ -1295,10 +1294,9 @@ public final class ManagedIndexSchema extends IndexSchema {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
TokenizerFactory tokenizerFactory = chain.getTokenizerFactory();
|
TokenizerFactory tokenizerFactory = chain.getTokenizerFactory();
|
||||||
if (tokenizerFactory != null && tokenizerFactory instanceof ResourceLoaderAware) {
|
if (tokenizerFactory instanceof ResourceLoaderAware) {
|
||||||
try {
|
try {
|
||||||
((ResourceLoaderAware) tokenizerFactory).inform(loader);
|
((ResourceLoaderAware) tokenizerFactory).inform(loader);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -1307,7 +1305,6 @@ public final class ManagedIndexSchema extends IndexSchema {
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenFilterFactory[] filters = chain.getTokenFilterFactories();
|
TokenFilterFactory[] filters = chain.getTokenFilterFactories();
|
||||||
if (filters != null) {
|
|
||||||
for (TokenFilterFactory next : filters) {
|
for (TokenFilterFactory next : filters) {
|
||||||
if (next instanceof ResourceLoaderAware) {
|
if (next instanceof ResourceLoaderAware) {
|
||||||
try {
|
try {
|
||||||
|
@ -1318,7 +1315,6 @@ public final class ManagedIndexSchema extends IndexSchema {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private ManagedIndexSchema(final SolrConfig solrConfig, final SolrResourceLoader loader, boolean isMutable,
|
private ManagedIndexSchema(final SolrConfig solrConfig, final SolrResourceLoader loader, boolean isMutable,
|
||||||
String managedSchemaResourceName, int schemaZkVersion, Object schemaUpdateLock)
|
String managedSchemaResourceName, int schemaZkVersion, Object schemaUpdateLock)
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
<?xml version="1.0" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<schema name="test" version="1.4">
|
||||||
|
<fieldType name="string" class="solr.StrField"/>
|
||||||
|
|
||||||
|
<!-- field type with custom TokenizerChain analyzer (created with 2-arg constructor, see SOLR-7765) -->
|
||||||
|
<fieldType name="custom_tc_string" class="solr.CustomAnalyzerStrField"/>
|
||||||
|
|
||||||
|
|
||||||
|
<dynamicField name="*" type="string" indexed="true" stored="true" />
|
||||||
|
</schema>
|
|
@ -20,6 +20,7 @@ package org.apache.solr.handler.admin;
|
||||||
import org.apache.solr.common.luke.FieldFlag;
|
import org.apache.solr.common.luke.FieldFlag;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.schema.CustomAnalyzerStrField; // jdoc
|
||||||
import org.apache.solr.util.AbstractSolrTestCase;
|
import org.apache.solr.util.AbstractSolrTestCase;
|
||||||
import org.apache.solr.util.TestHarness;
|
import org.apache.solr.util.TestHarness;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
@ -198,6 +199,27 @@ public class LukeRequestHandlerTest extends AbstractSolrTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @see CustomAnalyzerStrField */
|
||||||
|
public void testNullFactories() throws Exception {
|
||||||
|
deleteCore();
|
||||||
|
initCore("solrconfig.xml", "schema-null-charfilters-analyzer.xml");
|
||||||
|
|
||||||
|
try {
|
||||||
|
assertQ(req("qt", "/admin/luke", "show", "schema")
|
||||||
|
, "//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']"
|
||||||
|
, "//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']"
|
||||||
|
, "0=count(//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']/lst[@name='filters'])"
|
||||||
|
, "0=count(//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']/lst[@name='filters'])"
|
||||||
|
, "0=count(//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']/lst[@name='charFilters'])"
|
||||||
|
, "0=count(//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']/lst[@name='charFilters'])"
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
// Put back the configuration expected by the rest of the tests in this suite
|
||||||
|
deleteCore();
|
||||||
|
initCore("solrconfig.xml", "schema12.xml");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testCopyFieldLists() throws Exception {
|
public void testCopyFieldLists() throws Exception {
|
||||||
SolrQueryRequest req = req("qt", "/admin/luke", "show", "schema");
|
SolrQueryRequest req = req("qt", "/admin/luke", "show", "schema");
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,74 @@
|
||||||
|
package org.apache.solr.schema;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
|
||||||
|
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
|
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
import org.apache.solr.analysis.TokenizerChain;
|
||||||
|
import org.apache.solr.handler.admin.LukeRequestHandlerTest; // jdoc
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Test only custom FieldType that specifies null for various params when constructing
|
||||||
|
* TokenizerChain instances to ensure that they are still well behaved.
|
||||||
|
*
|
||||||
|
* @see LukeRequestHandlerTest#testNullFactories
|
||||||
|
*/
|
||||||
|
public class CustomAnalyzerStrField extends StrField {
|
||||||
|
private final Analyzer indexAnalyzer;
|
||||||
|
private final Analyzer queryAnalyzer;
|
||||||
|
|
||||||
|
public CustomAnalyzerStrField() {
|
||||||
|
Random r = LuceneTestCase.random();
|
||||||
|
|
||||||
|
// two arg constructor
|
||||||
|
Analyzer a2 = new TokenizerChain
|
||||||
|
(new KeywordTokenizerFactory(new HashMap<>()),
|
||||||
|
r.nextBoolean() ? null : new TokenFilterFactory[0]);
|
||||||
|
|
||||||
|
// three arg constructor
|
||||||
|
Analyzer a3 = new TokenizerChain
|
||||||
|
(r.nextBoolean() ? null : new CharFilterFactory[0],
|
||||||
|
new KeywordTokenizerFactory(new HashMap<>()),
|
||||||
|
r.nextBoolean() ? null : new TokenFilterFactory[0]);
|
||||||
|
|
||||||
|
if (r.nextBoolean()) {
|
||||||
|
indexAnalyzer = a2;
|
||||||
|
queryAnalyzer = a3;
|
||||||
|
} else {
|
||||||
|
queryAnalyzer = a2;
|
||||||
|
indexAnalyzer = a3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Analyzer getIndexAnalyzer() {
|
||||||
|
return indexAnalyzer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Analyzer getQueryAnalyzer() {
|
||||||
|
return queryAnalyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -88,7 +88,7 @@ public class MultiTermTest extends SolrTestCaseJ4 {
|
||||||
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
|
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(tc.getCharFilterFactories() == null);
|
assertTrue(tc.getCharFilterFactories().length == 0);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue