LUCENE-2413: consolidate commongrams into contrib/analyzers

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@940761 13f79535-47bb-0310-9956-ffa450edef68
2010-05-04 07:50:41 +00:00 · 2010-05-04 07:50:41 +00:00 · 1d1331fa03
parent 435cc41758
commit 1d1331fa03
7 changed files with 48 additions and 16 deletions
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -155,6 +155,12 @@ New features
   of AttributeSource.cloneAttributes() instances and the new copyTo() method.
   (Steven Rowe via Uwe Schindler)
 * LUCENE-2413: Consolidated Solr analysis components into contrib/analyzers. 
   New features from Solr now available to Lucene users include:
   - o.a.l.analysis.commongrams: Constructs n-grams for frequently occurring terms
     and phrases. 
   (... in progress)
 Build
 * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 
--- a/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
+++ b/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
@ -7,7 +7,7 @@
 * See the License for the specific language governing permissions and limitations under the License. 
 */
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.commongrams;
 import java.io.IOException;
 import java.util.Arrays;
--- a/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
+++ b/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
@ -14,7 +14,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.commongrams;
 import java.io.IOException;
@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import static org.apache.solr.analysis.CommonGramsFilter.GRAM_TYPE;
+import static org.apache.lucene.analysis.commongrams.CommonGramsFilter.GRAM_TYPE;
 /**
 * Wrap a CommonGramsFilter optimizing phrase queries by only returning single
--- a/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html
+++ b/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/commongrams/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Construct n-grams for frequently occurring terms and phrases.
 </body>
 </html>
--- a/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
+++ b/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
@ -14,28 +14,29 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.commongrams;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 /**
- * Tests CommonGramsQueryFilter
+ * Tests CommonGrams(Query)Filter
 */
-public class CommonGramsFilterTest extends BaseTokenTestCase {
+public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
  private static final String[] commonWords = { "s", "a", "b", "c", "d", "the",
      "of" };
  public void testReset() throws Exception {
    final String input = "How the s a brown s cow d like A B thing?";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
@ -56,7 +57,7 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
  public void testQueryReset() throws Exception {
    final String input = "How the s a brown s cow d like A B thing?";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
@ -88,7 +89,7 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
      @Override
      public TokenStream tokenStream(String field, Reader in) {
        return new CommonGramsQueryFilter(new CommonGramsFilter(
-            new WhitespaceTokenizer(DEFAULT_VERSION, in), commonWords));
+            new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords));
      } 
    };
@ -157,7 +158,7 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
      @Override
      public TokenStream tokenStream(String field, Reader in) {
        return new CommonGramsFilter(
-            new WhitespaceTokenizer(DEFAULT_VERSION, in), commonWords);
+            new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords);
      } 
    };
@ -243,7 +244,7 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
   */
  public void testCaseSensitive() throws Exception {
    final String input = "How The s a brown s cow d like A B thing?";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    Set common = CommonGramsFilter.makeCommonSet(commonWords);
    TokenFilter cgf = new CommonGramsFilter(wt, common, false);
    assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
@ -256,7 +257,7 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
   */
  public void testLastWordisStopWord() throws Exception {
    final String input = "dog the";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    TokenFilter nsf = new CommonGramsQueryFilter(cgf);
    assertTokenStreamContents(nsf, new String[] { "dog_the" });
@ -267,7 +268,7 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
   */
  public void testFirstWordisStopWord() throws Exception {
    final String input = "the dog";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    TokenFilter nsf = new CommonGramsQueryFilter(cgf);
    assertTokenStreamContents(nsf, new String[] { "the_dog" });
@ -278,7 +279,7 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
   */
  public void testOneWordQueryStopWord() throws Exception {
    final String input = "the";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    TokenFilter nsf = new CommonGramsQueryFilter(cgf);
    assertTokenStreamContents(nsf, new String[] { "the" });
@ -289,7 +290,7 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
   */
  public void testOneWordQuery() throws Exception {
    final String input = "monster";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    TokenFilter nsf = new CommonGramsQueryFilter(cgf);
    assertTokenStreamContents(nsf, new String[] { "monster" });
@ -300,7 +301,7 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
   */
  public void TestFirstAndLastStopWord() throws Exception {
    final String input = "the of";
-    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+    WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    TokenFilter nsf = new CommonGramsQueryFilter(cgf);
    assertTokenStreamContents(nsf, new String[] { "the_of" });
--- a/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
@ -22,6 +22,7 @@ import java.util.Set;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.util.plugin.ResourceLoaderAware;
--- a/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
@ -23,6 +23,8 @@ import java.util.Set;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
 import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.util.plugin.ResourceLoaderAware;