LUCENE-4044: port over synfilter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene2510@1364907 13f79535-47bb-0310-9956-ffa450edef68
2025-02-23 10:51:29 +00:00 · 2012-07-24 06:18:49 +00:00 · 2012-07-24 06:18:49 +00:00 · 5249e46aee
commit 5249e46aee
parent dfe9a8444a
5 changed files with 43 additions and 43 deletions
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
@ -1,4 +1,4 @@
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.synonym;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -38,9 +38,6 @@ import org.apache.lucene.analysis.synonym.SolrSynonymParser;
 import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
 import org.apache.lucene.analysis.util.*;
 import org.apache.lucene.util.Version;
 import org.apache.solr.common.util.StrUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 /**
 * Factory for {@link SynonymFilter}.
@ -55,9 +52,6 @@ import org.slf4j.LoggerFactory;
 * &lt;/fieldType&gt;</pre>
 */
 public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
  public static final Logger log = LoggerFactory.getLogger(SynonymFilterFactory.class);
  private SynonymMap map;
  private boolean ignoreCase;
@ -100,10 +94,6 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
    } catch (Exception e) {
      throw new InitializationException("Exception thrown while loading synonyms", e);
    }
    if (map.fst == null) {
      log.warn("Synonyms loaded with " + args + " has empty rule set!");
    }
  }
  /**
@ -125,7 +115,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
      decoder.reset();
      parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
    } else {
-      List<String> files = StrUtils.splitFileNames(synonyms);
+      List<String> files = splitFileNames(synonyms);
      for (String file : files) {
        decoder.reset();
        parser.add(new InputStreamReader(loader.openResource(file), decoder));
@ -153,7 +143,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
      decoder.reset();
      parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
    } else {
-      List<String> files = StrUtils.splitFileNames(synonyms);
+      List<String> files = splitFileNames(synonyms);
      for (String file : files) {
        decoder.reset();
        parser.add(new InputStreamReader(loader.openResource(file), decoder));
@ -162,6 +152,8 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
    return parser.build();
  }
  // nocommit: spi-hack solr.xxx and o.a.solr.analysis.xxx via a delegator
  // (there are no tests for this functionality)
  private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname){
    TokenizerFactory tokFactory = loader.newInstance(cname, TokenizerFactory.class);
    tokFactory.setLuceneMatchVersion(luceneMatchVersion);
--- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
+++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@ -85,5 +85,6 @@ org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory
 org.apache.lucene.analysis.standard.ClassicFilterFactory
 org.apache.lucene.analysis.standard.StandardFilterFactory
 org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory
 org.apache.lucene.analysis.synonym.SynonymFilterFactory
 org.apache.lucene.analysis.th.ThaiWordFilterFactory
 org.apache.lucene.analysis.tr.TurkishLowerCaseFilterFactory
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
@ -1,4 +1,4 @@
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.synonym;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -17,21 +17,16 @@ package org.apache.solr.analysis;
 * limitations under the License.
 */
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.StringReader;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.synonym.SynonymFilter;
-import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
-import org.apache.solr.core.SolrResourceLoader;
+import org.apache.lucene.analysis.util.StringMockResourceLoader;
 public class TestSynonymFilterFactory extends BaseTokenStreamTestCase {
  /** test that we can parse and use the solr syn file */
@ -41,7 +36,7 @@ public class TestSynonymFilterFactory extends BaseTokenStreamTestCase {
    args.put("synonyms", "synonyms.txt");
    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    factory.init(args);
-    factory.inform(new SolrResourceLoader("solr/collection1"));
+    factory.inform(new ResourceAsStreamResourceLoader(getClass()));
    TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
    assertTrue(ts instanceof SynonymFilter);
    assertTokenStreamContents(ts, 
@ -56,28 +51,8 @@ public class TestSynonymFilterFactory extends BaseTokenStreamTestCase {
    args.put("synonyms", "synonyms.txt");
    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    factory.init(args);
-    factory.inform(new StringMockSolrResourceLoader("")); // empty file!
+    factory.inform(new StringMockResourceLoader("")); // empty file!
    TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
    assertTokenStreamContents(ts, new String[] { "GB" });
  }
  private class StringMockSolrResourceLoader implements ResourceLoader {
    String text;
    StringMockSolrResourceLoader(String text) {
      this.text = text;
    }
    public List<String> getLines(String resource) throws IOException {
      return Arrays.asList(text.split("\n"));
    }
    public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
      return null;
    }
    public InputStream openResource(String resource) throws IOException {
      return new ByteArrayInputStream(text.getBytes("UTF-8"));
    }
  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/synonyms.txt
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/synonyms.txt
@ -0,0 +1,31 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #-----------------------------------------------------------------------
 #some test synonym mappings unlikely to appear in real input text
 aaa => aaaa
 bbb => bbbb1 bbbb2
 ccc => cccc1,cccc2
 a\=>a => b\=>b
 a\,a => b\,b
 fooaaa,baraaa,bazaaa
 # Some synonym groups specific to this example
 GB,gib,gigabyte,gigabytes
 MB,mib,megabyte,megabytes
 Television, Televisions, TV, TVs
 #notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
 #after us won't split it into two words.
 # Synonym mappings can be used for spelling correction too
 pixima => pixma
--- a/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java
+++ b/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java
@ -20,6 +20,7 @@ package org.apache.solr.analysis;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import java.io.ByteArrayInputStream;