Merge remote-tracking branch 'origin/master'

2016-11-18 20:29:42 +05:30 · 2016-11-18 20:29:42 +05:30 · 3c4315c566
parent 0ec660a113 157c0bca92
commit 3c4315c566
5 changed files with 75 additions and 7 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -76,6 +76,9 @@ Bug Fixes
 * LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true
  when splitOnWhitespace=false (and vice-versa). (Steve Rowe)

+* LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
+  component when preserveOriginal was set to true. (Adrien Grand)
+
 Improvements

 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilterFactory.java
@ -17,6 +17,7 @@
 package org.apache.lucene.analysis.miscellaneous;


+import java.util.HashMap;
 import java.util.Map;

 import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@ -36,12 +37,14 @@ import org.apache.lucene.analysis.TokenStream;
 * &lt;/fieldType&gt;</pre>
 */
 public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+  private static final String PRESERVE_ORIGINAL = "preserveOriginal";
+
  private final boolean preserveOriginal;
  
  /** Creates a new ASCIIFoldingFilterFactory */
  public ASCIIFoldingFilterFactory(Map<String,String> args) {
    super(args);
-    preserveOriginal = getBoolean(args, "preserveOriginal", false);
+    preserveOriginal = getBoolean(args, PRESERVE_ORIGINAL, false);
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -54,7 +57,17 @@ public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements Mul

  @Override
  public AbstractAnalysisFactory getMultiTermComponent() {
-    return this;
+    if (preserveOriginal) {
+      // The main use-case for using preserveOriginal is to match regardless of
+      // case but to give better scores to exact matches. Since most multi-term
+      // queries return constant scores anyway, the multi-term component only
+      // emits the folded token
+      Map<String, String> args = new HashMap<>(getOriginalArgs());
+      args.remove(PRESERVE_ORIGINAL);
+      return new ASCIIFoldingFilterFactory(args);
+    } else {
+      return this;
+    }
  }
 }

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestAsciiFoldingFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestAsciiFoldingFilterFactory.java
@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+
+import org.apache.lucene.analysis.CannedTokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+public class TestAsciiFoldingFilterFactory extends BaseTokenStreamFactoryTestCase {
+
+  public void testMultiTermAnalysis() throws IOException {
+    TokenFilterFactory factory = new ASCIIFoldingFilterFactory(Collections.emptyMap());
+    TokenStream stream = new CannedTokenStream(new Token("Été", 0, 3));
+    stream = factory.create(stream);
+    assertTokenStreamContents(stream, new String[] { "Ete" });
+
+    factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
+    stream = new CannedTokenStream(new Token("Été", 0, 3));
+    stream = factory.create(stream);
+    assertTokenStreamContents(stream, new String[] { "Ete" });
+
+    factory = new ASCIIFoldingFilterFactory(new HashMap<>(Collections.singletonMap("preserveOriginal", "true")));
+    stream = new CannedTokenStream(new Token("Été", 0, 3));
+    stream = factory.create(stream);
+    assertTokenStreamContents(stream, new String[] { "Ete", "Été" });
+
+    factory = (TokenFilterFactory) ((MultiTermAwareComponent) factory).getMultiTermComponent();
+    stream = new CannedTokenStream(new Token("Été", 0, 3));
+    stream = factory.create(stream);
+    assertTokenStreamContents(stream, new String[] { "Ete" });
+  }
+
+}
--- a/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java
+++ b/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java
@ -56,7 +56,7 @@ public abstract class PrimaryNode extends Node {
  // Current NRT segment infos, incRef'd with IndexWriter.deleter:
  private SegmentInfos curInfos;

-  final IndexWriter writer;
+  protected final IndexWriter writer;

  // IncRef'd state of the last published NRT point; when a replica comes asking, we give it this as the current NRT point:
  private CopyState copyState;
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@ -2645,16 +2645,14 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
      try {
        FileUtils.deleteDirectory(dataDir);
      } catch (IOException e) {
-        SolrException.log(log, "Failed to delete data dir for unloaded core:" + cd.getName()
-            + " dir:" + dataDir.getAbsolutePath());
+        log.error("Failed to delete data dir for unloaded core: {} dir: {}", cd.getName(), dataDir.getAbsolutePath(), e);
      }
    }
    if (deleteInstanceDir) {
      try {
        FileUtils.deleteDirectory(cd.getInstanceDir().toFile());
      } catch (IOException e) {
-        SolrException.log(log, "Failed to delete instance dir for unloaded core:" + cd.getName()
-            + " dir:" + cd.getInstanceDir());
+        log.error("Failed to delete instance dir for unloaded core: {} dir: {}", cd.getName(), cd.getInstanceDir(), e);
      }
    }
  }