SOLR-7765: Hardened the behavior of TokenizerChain when null arguments are used in constructor. This prevents NPEs in some code paths.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692170 13f79535-47bb-0310-9956-ffa450edef68
2015-07-21 16:49:37 +00:00 · 2015-07-21 16:49:37 +00:00 · 9e61daf0fa
parent 585f8a7b26
commit 9e61daf0fa
11 changed files with 189 additions and 44 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -239,6 +239,9 @@ Bug Fixes
 * SOLR-7810: map-reduce contrib script to set classpath for convenience refers to example
  rather than server. (Mark Miller)

+* SOLR-7765: Hardened the behavior of TokenizerChain when null arguments are used in constructor.
+  This prevents NPEs in some code paths.  (Konstantin Gribov, hossman)
+
 Optimizations
 ----------------------

--- a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
+++ b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
@ -29,22 +29,47 @@ import java.io.Reader;
 * create a TokenStream.
 */
 public final class TokenizerChain extends SolrAnalyzer {
+  private static final CharFilterFactory[] EMPTY_CHAR_FITLERS = new CharFilterFactory[0];
+  private static final TokenFilterFactory[] EMPTY_TOKEN_FITLERS = new TokenFilterFactory[0];
+  
  final private CharFilterFactory[] charFilters;
  final private TokenizerFactory tokenizer;
  final private TokenFilterFactory[] filters;

+  /** 
+   * Creates a new TokenizerChain w/o any CharFilterFactories.
+   *
+   * @param tokenizer Factory for the Tokenizer to use, must not be null.
+   * @param filters Factories for the TokenFilters to use - if null, will be treated as if empty.
+   */
  public TokenizerChain(TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
    this(null,tokenizer,filters);
  }

+  /** 
+   * Creates a new TokenizerChain.
+   *
+   * @param charFilters Factories for the CharFilters to use, if any - if null, will be treated as if empty.
+   * @param tokenizer Factory for the Tokenizer to use, must not be null.
+   * @param filters Factories for the TokenFilters to use if any- if null, will be treated as if empty.
+   */
  public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
+    charFilters = null == charFilters ? EMPTY_CHAR_FITLERS : charFilters;
+    filters = null == filters ? EMPTY_TOKEN_FITLERS : filters;
+    if (null == tokenizer) {
+      throw new NullPointerException("TokenizerFactory must not be null");
+    }
+    
    this.charFilters = charFilters;
    this.tokenizer = tokenizer;
    this.filters = filters;
  }

+  /** @return array of CharFilterFactories, may be empty but never null */
  public CharFilterFactory[] getCharFilterFactories() { return charFilters; }
+  /** @return the TokenizerFactory in use, will never be null */
  public TokenizerFactory getTokenizerFactory() { return tokenizer; }
+  /** @return array of TokenFilterFactories, may be empty but never null */
  public TokenFilterFactory[] getTokenFilterFactories() { return filters; }

  @Override
--- a/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
+++ b/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
@ -104,7 +104,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {

    NamedList<Object> namedList = new NamedList<>();

-    if( cfiltfacs != null ){
+    if (0 < cfiltfacs.length) {
      String source = value;
      for(CharFilterFactory cfiltfac : cfiltfacs ){
        Reader reader = new StringReader(source);
--- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
@ -497,15 +497,15 @@ public class LukeRequestHandler extends RequestHandlerBase
      TokenizerChain tchain = (TokenizerChain)analyzer;

      CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
-      SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<>();
-      for (CharFilterFactory cfiltfac : cfiltfacs) {
-        Map<String, Object> tok = new HashMap<>();
-        String className = cfiltfac.getClass().getName();
-        tok.put("className", className);
-        tok.put("args", cfiltfac.getOriginalArgs());
-        cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
-      }
-      if (cfilters.size() > 0) {
+      if (0 < cfiltfacs.length) {
+        SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<>();
+        for (CharFilterFactory cfiltfac : cfiltfacs) {
+          Map<String, Object> tok = new HashMap<>();
+          String className = cfiltfac.getClass().getName();
+          tok.put("className", className);
+          tok.put("args", cfiltfac.getOriginalArgs());
+          cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
+        }
        aninfo.add("charFilters", cfilters);
      }

@ -516,15 +516,15 @@ public class LukeRequestHandler extends RequestHandlerBase
      aninfo.add("tokenizer", tokenizer);

      TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
-      SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<>();
-      for (TokenFilterFactory filtfac : filtfacs) {
-        Map<String, Object> tok = new HashMap<>();
-        String className = filtfac.getClass().getName();
-        tok.put("className", className);
-        tok.put("args", filtfac.getOriginalArgs());
-        filters.add(className.substring(className.lastIndexOf('.')+1), tok);
-      }
-      if (filters.size() > 0) {
+      if (0 < filtfacs.length) {
+        SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<>();
+        for (TokenFilterFactory filtfac : filtfacs) {
+          Map<String, Object> tok = new HashMap<>();
+          String className = filtfac.getClass().getName();
+          tok.put("className", className);
+          tok.put("args", filtfac.getOriginalArgs());
+          filters.add(className.substring(className.lastIndexOf('.')+1), tok);
+        }
        aninfo.add("filters", filters);
      }
    }
--- a/solr/core/src/java/org/apache/solr/schema/FieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java
@ -883,7 +883,7 @@ public abstract class FieldType extends FieldProperties {
      Map<String,String> factoryArgs;
      TokenizerChain tokenizerChain = (TokenizerChain)analyzer;
      CharFilterFactory[] charFilterFactories = tokenizerChain.getCharFilterFactories();
-      if (null != charFilterFactories && charFilterFactories.length > 0) {
+      if (0 < charFilterFactories.length) {
        List<SimpleOrderedMap<Object>> charFilterProps = new ArrayList<>();
        for (CharFilterFactory charFilterFactory : charFilterFactories) {
          SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
@ -927,7 +927,7 @@ public abstract class FieldType extends FieldProperties {
      analyzerProps.add(TOKENIZER, tokenizerProps);

      TokenFilterFactory[] filterFactories = tokenizerChain.getTokenFilterFactories();
-      if (null != filterFactories && filterFactories.length > 0) {
+      if (0 < filterFactories.length) {
        List<SimpleOrderedMap<Object>> filterProps = new ArrayList<>();
        for (TokenFilterFactory filterFactory : filterFactories) {
          SimpleOrderedMap<Object> props = new SimpleOrderedMap<>();
--- a/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
@ -175,10 +175,8 @@ public final class FieldTypePluginLoader
    MultiTermChainBuilder builder = new MultiTermChainBuilder();

    CharFilterFactory[] charFactories = tc.getCharFilterFactories();
-    if (charFactories != null) {
-      for (CharFilterFactory fact : charFactories) {
-        builder.add(fact);
-      }
+    for (CharFilterFactory fact : charFactories) {
+      builder.add(fact);
    }

    builder.add(tc.getTokenizerFactory());
--- a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java
+++ b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java
@ -1285,20 +1285,18 @@ public final class ManagedIndexSchema extends IndexSchema {
   */
  protected void informResourceLoaderAwareObjectsInChain(TokenizerChain chain) {
    CharFilterFactory[] charFilters = chain.getCharFilterFactories();
-    if (charFilters != null) {
-      for (CharFilterFactory next : charFilters) {
-        if (next instanceof ResourceLoaderAware) {
-          try {
-            ((ResourceLoaderAware) next).inform(loader);
-          } catch (IOException e) {
-            throw new SolrException(ErrorCode.SERVER_ERROR, e);
-          }
+    for (CharFilterFactory next : charFilters) {
+      if (next instanceof ResourceLoaderAware) {
+        try {
+          ((ResourceLoaderAware) next).inform(loader);
+        } catch (IOException e) {
+          throw new SolrException(ErrorCode.SERVER_ERROR, e);
+        }
        }
-      }
    }

    TokenizerFactory tokenizerFactory = chain.getTokenizerFactory();
-    if (tokenizerFactory != null && tokenizerFactory instanceof ResourceLoaderAware) {
+    if (tokenizerFactory instanceof ResourceLoaderAware) {
      try {
        ((ResourceLoaderAware) tokenizerFactory).inform(loader);
      } catch (IOException e) {
@ -1307,14 +1305,12 @@ public final class ManagedIndexSchema extends IndexSchema {
    }

    TokenFilterFactory[] filters = chain.getTokenFilterFactories();
-    if (filters != null) {
-      for (TokenFilterFactory next : filters) {
-        if (next instanceof ResourceLoaderAware) {
-          try {
-            ((ResourceLoaderAware) next).inform(loader);
-          } catch (IOException e) {
-            throw new SolrException(ErrorCode.SERVER_ERROR, e);
-          }
+    for (TokenFilterFactory next : filters) {
+      if (next instanceof ResourceLoaderAware) {
+        try {
+          ((ResourceLoaderAware) next).inform(loader);
+        } catch (IOException e) {
+          throw new SolrException(ErrorCode.SERVER_ERROR, e);
        }
      }
    }
--- a/solr/core/src/test-files/solr/collection1/conf/schema-null-charfilters-analyzer.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-null-charfilters-analyzer.xml
@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+
+<schema name="test" version="1.4">
+  <fieldType name="string" class="solr.StrField"/>
+
+  <!-- field type with custom TokenizerChain analyzer (created with 2-arg constructor, see SOLR-7765) -->
+  <fieldType name="custom_tc_string" class="solr.CustomAnalyzerStrField"/>
+
+
+  <dynamicField name="*" type="string" indexed="true" stored="true" />
+</schema>
--- a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java
@ -20,6 +20,7 @@ package org.apache.solr.handler.admin;
 import org.apache.solr.common.luke.FieldFlag;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.CustomAnalyzerStrField; // jdoc
 import org.apache.solr.util.AbstractSolrTestCase;
 import org.apache.solr.util.TestHarness;
 import org.junit.Before;
@ -198,6 +199,27 @@ public class LukeRequestHandlerTest extends AbstractSolrTestCase {
    }
  }

+  /** @see CustomAnalyzerStrField */
+  public void testNullFactories() throws Exception {
+    deleteCore();
+    initCore("solrconfig.xml", "schema-null-charfilters-analyzer.xml");
+
+    try {
+      assertQ(req("qt", "/admin/luke", "show", "schema")
+              , "//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']"
+              , "//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']"
+              , "0=count(//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']/lst[@name='filters'])"
+              , "0=count(//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']/lst[@name='filters'])"
+              , "0=count(//lst[@name='custom_tc_string']/lst[@name='indexAnalyzer']/lst[@name='charFilters'])"
+              , "0=count(//lst[@name='custom_tc_string']/lst[@name='queryAnalyzer']/lst[@name='charFilters'])"
+              );
+    } finally {
+      // Put back the configuration expected by the rest of the tests in this suite
+      deleteCore();
+      initCore("solrconfig.xml", "schema12.xml");
+    }
+  }
+
  public void testCopyFieldLists() throws Exception {
    SolrQueryRequest req = req("qt", "/admin/luke", "show", "schema");

--- a/solr/core/src/test/org/apache/solr/schema/CustomAnalyzerStrField.java
+++ b/solr/core/src/test/org/apache/solr/schema/CustomAnalyzerStrField.java
@ -0,0 +1,74 @@
+package org.apache.solr.schema;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashMap;
+import java.util.Random;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.util.LuceneTestCase;
+
+import org.apache.solr.analysis.TokenizerChain;
+import org.apache.solr.handler.admin.LukeRequestHandlerTest; // jdoc
+
+/**
+ * A Test only custom FieldType that specifies null for various params when constructing 
+ * TokenizerChain instances to ensure that they are still well behaved.
+ *
+ * @see LukeRequestHandlerTest#testNullFactories
+ */
+public class CustomAnalyzerStrField extends StrField {
+  private final Analyzer indexAnalyzer;
+  private final Analyzer queryAnalyzer;
+
+  public CustomAnalyzerStrField() {
+    Random r = LuceneTestCase.random();
+
+    // two arg constructor
+    Analyzer a2 = new TokenizerChain
+      (new KeywordTokenizerFactory(new HashMap<>()),
+       r.nextBoolean() ? null : new TokenFilterFactory[0]);
+    
+    // three arg constructor
+    Analyzer a3 = new TokenizerChain
+      (r.nextBoolean() ? null : new CharFilterFactory[0],
+       new KeywordTokenizerFactory(new HashMap<>()),
+       r.nextBoolean() ? null : new TokenFilterFactory[0]);
+
+    if (r.nextBoolean()) {
+      indexAnalyzer = a2;
+      queryAnalyzer = a3;
+    } else {
+      queryAnalyzer = a2;
+      indexAnalyzer = a3;
+    }
+  }
+
+  @Override
+  public Analyzer getIndexAnalyzer() {
+    return indexAnalyzer;
+  }
+
+  @Override
+  public Analyzer getQueryAnalyzer() {
+    return queryAnalyzer;
+  }
+}
--- a/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
@ -88,7 +88,7 @@ public class MultiTermTest extends SolrTestCaseJ4 {
      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
    }

-    assertTrue(tc.getCharFilterFactories() == null);
+    assertTrue(tc.getCharFilterFactories().length == 0);

  }
 }