LUCENE-7318: Forward port some changes (add StopFilter and LowercaseFilter at their original location)

2016-09-12 19:47:28 +02:00 · 2016-09-12 19:47:28 +02:00 · b39fcc1202
parent 86e4af60f0
commit b39fcc1202
13 changed files with 116 additions and 16 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -56,6 +56,11 @@ Bug Fixes
 * LUCENE-7442: MinHashFilter's ctor should validate its args.
  (Cao Manh Dat via Steve Rowe)

+* LUCENE-7318: Fix backwards compatibility issues around StandardAnalyzer
+  and its components, introduced with Lucene 6.2.0. The moved classes
+  were restored in their original packages: LowercaseFilter and StopFilter,
+  as well as several utility classes.  (Uwe Schindler, Mike McCandless)
+
 Improvements

 Optimizations
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.core;
+
+
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Normalizes token text to lower case.
+ * <p>
+ * This class moved to Lucene Core, but a reference in the {@code analysis/common} module
+ * is preserved for documentation purposes and consistency with filter factory.
+ * @see org.apache.lucene.analysis.LowerCaseFilter
+ * @see LowerCaseFilterFactory
+ */
+public final class LowerCaseFilter extends org.apache.lucene.analysis.LowerCaseFilter {
+  
+  /**
+   * Create a new LowerCaseFilter, that normalizes token text to lower case.
+   * 
+   * @param in TokenStream to filter
+   */
+  public LowerCaseFilter(TokenStream in) {
+    super(in);
+  }
+  
+}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core;
 import java.util.Map;

 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
 import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.core;
+
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Removes stop words from a token stream.
+ * <p>
+ * This class moved to Lucene Core, but a reference in the {@code analysis/common} module
+ * is preserved for documentation purposes and consistency with filter factory.
+ * @see org.apache.lucene.analysis.StopFilter
+ * @see StopFilterFactory
+ */
+public final class StopFilter extends org.apache.lucene.analysis.StopFilter {
+
+  /**
+   * Constructs a filter which removes words from the input TokenStream that are
+   * named in the Set.
+   * 
+   * @param in
+   *          Input stream
+   * @param stopWords
+   *          A {@link CharArraySet} representing the stopwords.
+   * @see #makeStopSet(java.lang.String...)
+   */
+  public StopFilter(TokenStream in, CharArraySet stopWords) {
+    super(in, stopWords);
+  }
+
+}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
@ -21,7 +21,6 @@ import java.io.IOException;
 import java.util.Map;

 import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WordlistLoader; // jdocs
 import org.apache.lucene.analysis.util.ResourceLoader;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
@ -46,5 +46,9 @@
         and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
     </li>
 </ul>
+ <p>
+ This Java package additionally contains {@code StandardAnalyzer}, {@code StandardTokenizer},
+ and {@code StandardFilter}, which are not visible here, because they moved to Lucene Core.
+ The factories for those components (e.g., used in Solr) are still part of this module.
 </body>
 </html>
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
@ -106,7 +106,9 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
      SnowballFilter.class, // this is called SnowballPorterFilterFactory
      PatternKeywordMarkerFilter.class,
      SetKeywordMarkerFilter.class,
-      UnicodeWhitespaceTokenizer.class // a supported option via WhitespaceTokenizerFactory
+      UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory
+      org.apache.lucene.analysis.StopFilter.class, // class from core, but StopFilterFactory creates one from this module
+      org.apache.lucene.analysis.LowerCaseFilter.class // class from core, but LowerCaseFilterFactory creates one from this module
    );
  }

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@ -166,7 +166,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
          // also randomly pick it:
          ValidatingTokenFilter.class, 
          // TODO: needs to be a tokenizer, doesnt handle graph inputs properly (a shingle or similar following will then cause pain)
-          WordDelimiterFilter.class)) {
+          WordDelimiterFilter.class,
+          // clones of core's filters:
+          org.apache.lucene.analysis.core.StopFilter.class,
+          org.apache.lucene.analysis.core.LowerCaseFilter.class)) {
        for (Constructor<?> ctor : c.getConstructors()) {
          brokenConstructors.put(ctor, ALWAYS);
        }
--- a/lucene/core/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.CharacterUtils;
 /**
 * Normalizes token text to lower case.
 */
-public final class LowerCaseFilter extends TokenFilter {
+public class LowerCaseFilter extends TokenFilter {
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  
  /**
--- a/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.CharArraySet;
 /**
 * Removes stop words from a token stream.
 */
-public final class StopFilter extends FilteringTokenFilter {
+public class StopFilter extends FilteringTokenFilter {

  private final CharArraySet stopWords;
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
--- a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java
@ -19,7 +19,7 @@ import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.NamedList;
--- a/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
@ -278,11 +278,11 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
    assertNotNull("Expecting the 'StandardFilter' to be applied on the query for the 'text' field", tokenList);
    assertEquals("Query has only one token", 1, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1}, null, false));
-    tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.LowerCaseFilter");
+    tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
    assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the query for the 'text' field", tokenList);
    assertEquals("Query has only one token", 1, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1}, null, false));
-    tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.StopFilter");
+    tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.StopFilter");
    assertNotNull("Expecting the 'StopFilter' to be applied on the query for the 'text' field", tokenList);
    assertEquals("Query has only one token", 1, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1,1}, null, false));
@ -311,7 +311,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
    assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4}, null, false));
    assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5}, null, false));
    assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6}, null, false));
-    tokenList = valueResult.get("org.apache.lucene.analysis.LowerCaseFilter");
+    tokenList = valueResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
    assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the index for the 'text' field", tokenList);
    assertEquals("Expecting 6 tokens", 6, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
@ -320,7 +320,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
    assertToken(tokenList.get(3), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4}, null, false));
    assertToken(tokenList.get(4), new TokenInfo("the", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5,5}, null, false));
    assertToken(tokenList.get(5), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6}, null, false));
-    tokenList = valueResult.get("org.apache.lucene.analysis.StopFilter");
+    tokenList = valueResult.get("org.apache.lucene.analysis.core.StopFilter");
    assertNotNull("Expecting the 'StopFilter' to be applied on the index for the 'text' field", tokenList);
    assertEquals("Expecting 4 tokens after stop word removal", 4, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2,2}, null, false));
--- a/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
@ -209,7 +209,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
    assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8}, null, false));
    assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9}, null, true));
    assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10}, null, false));
-    tokenList = indexPart.get("org.apache.lucene.analysis.LowerCaseFilter");
+    tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
    assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
    assertEquals(tokenList.size(), 10);
    assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
@ -222,7 +222,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
    assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8}, null, false));
    assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9,9}, null, true));
    assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10}, null, false));
-    tokenList = indexPart.get("org.apache.lucene.analysis.StopFilter");
+    tokenList = indexPart.get("org.apache.lucene.analysis.core.StopFilter");
    assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
    assertEquals(tokenList.size(), 8);
    assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2}, null, false));
@ -258,12 +258,12 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
    assertEquals(2, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
    assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2}, null, false));
-    tokenList = queryPart.get("org.apache.lucene.analysis.LowerCaseFilter");
+    tokenList = queryPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
    assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
    assertEquals(2, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
    assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2}, null, false));
-    tokenList = queryPart.get("org.apache.lucene.analysis.StopFilter");
+    tokenList = queryPart.get("org.apache.lucene.analysis.core.StopFilter");
    assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
    assertEquals(2, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1,1}, null, false));
@ -416,7 +416,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
    assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[]{2,3}, null, false));
    assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[]{3,4}, null, false));
    assertToken(tokenList.get(5), new TokenInfo("Test", null, "word", 14, 18, 5, new int[]{4,5}, null, false));
-    tokenList = indexPart.get("org.apache.lucene.analysis.LowerCaseFilter");
+    tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
    assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
    assertEquals(6, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[]{1,1,1}, null, false));