diff --git a/modules/analysis/CHANGES.txt b/modules/analysis/CHANGES.txt
index d051127dda4..24599552002 100644
--- a/modules/analysis/CHANGES.txt
+++ b/modules/analysis/CHANGES.txt
@@ -38,6 +38,10 @@ API Changes
since they prevented reuse. Stopwords are now generated at instantiation through
the Analyzer's constructors. (Chris Male)
+ * LUCENE-3434: Removed ShingleAnalyzerWrapper.set* and PerFieldAnalyzerWrapper.addAnalyzer
+ since they prevent reuse. Both Analyzers should be configured at instantiation.
+ (Chris Male)
+
New Features
* LUCENE-2341: A new analyzer/ filter: Morfologik - a dictionary-driven lemmatizer
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java
index 65f64c2072f..08ec36ac17a 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java
@@ -23,21 +23,25 @@ import org.apache.lucene.index.IndexableField;
import java.io.Reader;
import java.io.IOException;
+import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
/**
* This analyzer is used to facilitate scenarios where different
- * fields require different analysis techniques. Use {@link #addAnalyzer}
- * to add a non-default analyzer on a field name basis.
+ * fields require different analysis techniques. Use the Map
+ * argument in {@link #PerFieldAnalyzerWrapper(Analyzer, java.util.Map)}
+ * to add non-default analyzers for fields.
*
*
Example usage:
*
*
+ * Map analyzerPerField = new HashMap();
+ * analyzerPerField.put("firstname", new KeywordAnalyzer());
+ * analyzerPerField.put("lastname", new KeywordAnalyzer());
+ *
* PerFieldAnalyzerWrapper aWrapper =
- * new PerFieldAnalyzerWrapper(new StandardAnalyzer());
- * aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
- * aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+ * new PerFieldAnalyzerWrapper(new StandardAnalyzer(), analyzerPerField);
*
*
* In this example, StandardAnalyzer will be used for all fields except "firstname"
@@ -47,9 +51,8 @@ import java.util.HashMap;
* and query parsing.
*/
public final class PerFieldAnalyzerWrapper extends Analyzer {
- private Analyzer defaultAnalyzer;
- private Map analyzerMap = new HashMap();
-
+ private final Analyzer defaultAnalyzer;
+ private final Map fieldAnalyzers;
/**
* Constructs with default analyzer.
@@ -70,28 +73,15 @@ public final class PerFieldAnalyzerWrapper extends Analyzer {
* @param fieldAnalyzers a Map (String field name to the Analyzer) to be
* used for those fields
*/
- public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer,
+ public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer,
Map fieldAnalyzers) {
this.defaultAnalyzer = defaultAnalyzer;
- if (fieldAnalyzers != null) {
- analyzerMap.putAll(fieldAnalyzers);
- }
- }
-
-
- /**
- * Defines an analyzer to use for the specified field.
- *
- * @param fieldName field name requiring a non-default analyzer
- * @param analyzer non-default analyzer to use for field
- */
- public void addAnalyzer(String fieldName, Analyzer analyzer) {
- analyzerMap.put(fieldName, analyzer);
+ this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : Collections.emptyMap();
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
- Analyzer analyzer = analyzerMap.get(fieldName);
+ Analyzer analyzer = fieldAnalyzers.get(fieldName);
if (analyzer == null) {
analyzer = defaultAnalyzer;
}
@@ -101,7 +91,7 @@ public final class PerFieldAnalyzerWrapper extends Analyzer {
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
- Analyzer analyzer = analyzerMap.get(fieldName);
+ Analyzer analyzer = fieldAnalyzers.get(fieldName);
if (analyzer == null)
analyzer = defaultAnalyzer;
@@ -111,7 +101,7 @@ public final class PerFieldAnalyzerWrapper extends Analyzer {
/** Return the positionIncrementGap from the analyzer assigned to fieldName */
@Override
public int getPositionIncrementGap(String fieldName) {
- Analyzer analyzer = analyzerMap.get(fieldName);
+ Analyzer analyzer = fieldAnalyzers.get(fieldName);
if (analyzer == null)
analyzer = defaultAnalyzer;
return analyzer.getPositionIncrementGap(fieldName);
@@ -120,7 +110,7 @@ public final class PerFieldAnalyzerWrapper extends Analyzer {
/** Return the offsetGap from the analyzer assigned to field */
@Override
public int getOffsetGap(IndexableField field) {
- Analyzer analyzer = analyzerMap.get(field.name());
+ Analyzer analyzer = fieldAnalyzers.get(field.name());
if (analyzer == null) {
analyzer = defaultAnalyzer;
}
@@ -129,6 +119,6 @@ public final class PerFieldAnalyzerWrapper extends Analyzer {
@Override
public String toString() {
- return "PerFieldAnalyzerWrapper(" + analyzerMap + ", default=" + defaultAnalyzer + ")";
+ return "PerFieldAnalyzerWrapper(" + fieldAnalyzers + ", default=" + defaultAnalyzer + ")";
}
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
index 2bccdf12e47..217a3622b51 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
@@ -34,43 +34,79 @@ import org.apache.lucene.util.Version;
public final class ShingleAnalyzerWrapper extends Analyzer {
private final Analyzer defaultAnalyzer;
- private int maxShingleSize = ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE;
- private int minShingleSize = ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE;
- private String tokenSeparator = ShingleFilter.TOKEN_SEPARATOR;
- private boolean outputUnigrams = true;
- private boolean outputUnigramsIfNoShingles = false;
+ private final int maxShingleSize;
+ private final int minShingleSize;
+ private final String tokenSeparator;
+ private final boolean outputUnigrams;
+ private final boolean outputUnigramsIfNoShingles;
public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer) {
- super();
- this.defaultAnalyzer = defaultAnalyzer;
+ this(defaultAnalyzer, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
}
public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int maxShingleSize) {
- this(defaultAnalyzer);
- setMaxShingleSize(maxShingleSize);
+ this(defaultAnalyzer, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize);
}
public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int minShingleSize, int maxShingleSize) {
- this(defaultAnalyzer);
- setMaxShingleSize(maxShingleSize);
- setMinShingleSize(minShingleSize);
+ this(defaultAnalyzer, minShingleSize, maxShingleSize, ShingleFilter.TOKEN_SEPARATOR, true, false);
+ }
+
+ /**
+ * Creates a new ShingleAnalyzerWrapper
+ *
+ * @param defaultAnalyzer Analyzer whose TokenStream is to be filtered
+ * @param minShingleSize Min shingle (token ngram) size
+ * @param maxShingleSize Max shingle size
+ * @param tokenSeparator Used to separate input stream tokens in output shingles
+ * @param outputUnigrams Whether or not the filter shall pass the original
+ * tokens to the output stream
+ * @param outputUnigramsIfNoShingles Overrides the behavior of outputUnigrams==false for those
+ * times when no shingles are available (because there are fewer than
+ * minShingleSize tokens in the input stream)?
+ * Note that if outputUnigrams==true, then unigrams are always output,
+ * regardless of whether any shingles are available.
+ */
+ public ShingleAnalyzerWrapper(
+ Analyzer defaultAnalyzer,
+ int minShingleSize,
+ int maxShingleSize,
+ String tokenSeparator,
+ boolean outputUnigrams,
+ boolean outputUnigramsIfNoShingles) {
+ this.defaultAnalyzer = defaultAnalyzer;
+
+ if (maxShingleSize < 2) {
+ throw new IllegalArgumentException("Max shingle size must be >= 2");
+ }
+ this.maxShingleSize = maxShingleSize;
+
+ if (minShingleSize < 2) {
+ throw new IllegalArgumentException("Min shingle size must be >= 2");
+ }
+ if (minShingleSize > maxShingleSize) {
+ throw new IllegalArgumentException
+ ("Min shingle size must be <= max shingle size");
+ }
+ this.minShingleSize = minShingleSize;
+
+ this.tokenSeparator = (tokenSeparator == null ? "" : tokenSeparator);
+ this.outputUnigrams = outputUnigrams;
+ this.outputUnigramsIfNoShingles = outputUnigramsIfNoShingles;
}
/**
* Wraps {@link StandardAnalyzer}.
*/
public ShingleAnalyzerWrapper(Version matchVersion) {
- super();
- this.defaultAnalyzer = new StandardAnalyzer(matchVersion);
+ this(matchVersion, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
}
/**
* Wraps {@link StandardAnalyzer}.
*/
public ShingleAnalyzerWrapper(Version matchVersion, int minShingleSize, int maxShingleSize) {
- this(matchVersion);
- setMaxShingleSize(maxShingleSize);
- setMinShingleSize(minShingleSize);
+ this(new StandardAnalyzer(matchVersion), minShingleSize, maxShingleSize);
}
/**
@@ -82,18 +118,6 @@ public final class ShingleAnalyzerWrapper extends Analyzer {
return maxShingleSize;
}
- /**
- * Set the maximum size of output shingles (default: 2)
- *
- * @param maxShingleSize max shingle size
- */
- public void setMaxShingleSize(int maxShingleSize) {
- if (maxShingleSize < 2) {
- throw new IllegalArgumentException("Max shingle size must be >= 2");
- }
- this.maxShingleSize = maxShingleSize;
- }
-
/**
* The min shingle (token ngram) size
*
@@ -103,69 +127,17 @@ public final class ShingleAnalyzerWrapper extends Analyzer {
return minShingleSize;
}
- /**
- * Set the min shingle size (default: 2).
- *
This method requires that the passed in minShingleSize is not greater
- * than maxShingleSize, so make sure that maxShingleSize is set before
- * calling this method.
- *
- * @param minShingleSize min size of output shingles
- */
- public void setMinShingleSize(int minShingleSize) {
- if (minShingleSize < 2) {
- throw new IllegalArgumentException("Min shingle size must be >= 2");
- }
- if (minShingleSize > maxShingleSize) {
- throw new IllegalArgumentException
- ("Min shingle size must be <= max shingle size");
- }
- this.minShingleSize = minShingleSize;
- }
-
public String getTokenSeparator() {
return tokenSeparator;
}
-
- /**
- * Sets the string to use when joining adjacent tokens to form a shingle
- * @param tokenSeparator used to separate input stream tokens in output shingles
- */
- public void setTokenSeparator(String tokenSeparator) {
- this.tokenSeparator = (tokenSeparator == null ? "" : tokenSeparator);
- }
public boolean isOutputUnigrams() {
return outputUnigrams;
}
-
- /**
- * Shall the filter pass the original tokens (the "unigrams") to the output
- * stream?
- *
- * @param outputUnigrams Whether or not the filter shall pass the original
- * tokens to the output stream
- */
- public void setOutputUnigrams(boolean outputUnigrams) {
- this.outputUnigrams = outputUnigrams;
- }
public boolean isOutputUnigramsIfNoShingles() {
return outputUnigramsIfNoShingles;
}
-
- /**
- *
Shall we override the behavior of outputUnigrams==false for those
- * times when no shingles are available (because there are fewer than
- * minShingleSize tokens in the input stream)? (default: false.)
- *
Note that if outputUnigrams==true, then unigrams are always output,
- * regardless of whether any shingles are available.
- *
- * @param outputUnigramsIfNoShingles Whether or not to output a single
- * unigram when no shingles are available.
- */
- public void setOutputUnigramsIfNoShingles(boolean outputUnigramsIfNoShingles) {
- this.outputUnigramsIfNoShingles = outputUnigramsIfNoShingles;
- }
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java
index fa2c51d2d1a..6f9ad44d8c4 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java
@@ -1,6 +1,8 @@
package org.apache.lucene.analysis.miscellaneous;
import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
@@ -27,9 +29,12 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class TestPerFieldAnalzyerWrapper extends BaseTokenStreamTestCase {
public void testPerField() throws Exception {
String text = "Qwerty";
+
+ Map analyzerPerField = new HashMap();
+ analyzerPerField.put("special", new SimpleAnalyzer(TEST_VERSION_CURRENT));
+
PerFieldAnalyzerWrapper analyzer =
- new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
- analyzer.addAnalyzer("special", new SimpleAnalyzer(TEST_VERSION_CURRENT));
+ new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);
TokenStream tokenStream = analyzer.tokenStream("field",
new StringReader(text));
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
index e4b4f35fe03..aaade9fd350 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.shingle;
* limitations under the License.
*/
-import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
@@ -162,7 +161,9 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
new int[] { 0, 0, 0, 7, 7, 7, 14, 14, 14, 19, 19, 28, 33 },
new int[] { 6, 18, 27, 13, 27, 32, 18, 32, 41, 27, 41, 32, 41 },
new int[] { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1 });
- analyzer.setOutputUnigrams(false);
+
+ analyzer = new ShingleAnalyzerWrapper(
+ new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 4, ShingleFilter.TOKEN_SEPARATOR, false, false);
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
new String[] { "please divide this", "please divide this sentence",
"divide this sentence", "divide this sentence into",
@@ -186,7 +187,9 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
new int[] { 0, 0, 7, 7, 14, 14, 19, 19, 28, 33 },
new int[] { 6, 18, 13, 27, 18, 32, 27, 41, 32, 41 },
new int[] { 1, 0, 1, 0, 1, 0, 1, 0, 1, 1 });
- analyzer.setOutputUnigrams(false);
+
+ analyzer = new ShingleAnalyzerWrapper(
+ new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 3, ShingleFilter.TOKEN_SEPARATOR, false, false);
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
new String[] { "please divide this",
"divide this sentence",
@@ -198,9 +201,11 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
}
public void testNoTokenSeparator() throws Exception {
- ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
- analyzer.setTokenSeparator("");
+ ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(
+ new MockAnalyzer(random, MockTokenizer.WHITESPACE, false),
+ ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+ ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+ "", true, false);
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
"divide", "divideinto",
@@ -209,7 +214,12 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
new int[] { 0, 0, 7, 7, 14, 14, 19 },
new int[] { 6, 13, 13, 18, 18, 27, 27 },
new int[] { 1, 0, 1, 0, 1, 0, 1 });
- analyzer.setOutputUnigrams(false);
+
+ analyzer = new ShingleAnalyzerWrapper(
+ new MockAnalyzer(random, MockTokenizer.WHITESPACE, false),
+ ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+ ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+ "", false, false);
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "pleasedivide",
"divideinto",
@@ -220,9 +230,11 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
}
public void testNullTokenSeparator() throws Exception {
- ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
- analyzer.setTokenSeparator(null);
+ ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(
+ new MockAnalyzer(random, MockTokenizer.WHITESPACE, false),
+ ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+ ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+ null, true, false);
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
"divide", "divideinto",
@@ -231,7 +243,12 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
new int[] { 0, 0, 7, 7, 14, 14, 19 },
new int[] { 6, 13, 13, 18, 18, 27, 27 },
new int[] { 1, 0, 1, 0, 1, 0, 1 });
- analyzer.setOutputUnigrams(false);
+
+ analyzer = new ShingleAnalyzerWrapper(
+ new MockAnalyzer(random, MockTokenizer.WHITESPACE, false),
+ ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+ ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+ "", false, false);
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "pleasedivide",
"divideinto",
@@ -241,9 +258,11 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
new int[] { 1, 1, 1 });
}
public void testAltTokenSeparator() throws Exception {
- ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
- analyzer.setTokenSeparator("");
+ ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(
+ new MockAnalyzer(random, MockTokenizer.WHITESPACE, false),
+ ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+ ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+ "", true, false);
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
"divide", "divideinto",
@@ -252,7 +271,12 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
new int[] { 0, 0, 7, 7, 14, 14, 19 },
new int[] { 6, 13, 13, 18, 18, 27, 27 },
new int[] { 1, 0, 1, 0, 1, 0, 1 });
- analyzer.setOutputUnigrams(false);
+
+ analyzer = new ShingleAnalyzerWrapper(
+ new MockAnalyzer(random, MockTokenizer.WHITESPACE, false),
+ ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+ ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+ "", false, false);
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "pleasedivide",
"divideinto",
@@ -263,10 +287,11 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
}
public void testOutputUnigramsIfNoShinglesSingleToken() throws Exception {
- ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
- analyzer.setOutputUnigrams(false);
- analyzer.setOutputUnigramsIfNoShingles(true);
+ ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(
+ new MockAnalyzer(random, MockTokenizer.WHITESPACE, false),
+ ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+ ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
+ "", false, true);
assertAnalyzesToReuse(analyzer, "please",
new String[] { "please" },
new int[] { 0 },
diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java
index a4af36ae12d..6dfd4cf0817 100644
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java
@@ -21,6 +21,7 @@ import java.util.StringTokenizer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
+import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.benchmark.byTask.PerfRunData;
/**
@@ -64,9 +65,14 @@ public class NewShingleAnalyzerTask extends PerfTask {
}
wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(analyzerClassName);
}
- ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(wrappedAnalyzer, maxShingleSize);
- analyzer.setOutputUnigrams(outputUnigrams);
+
+ ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(
+ wrappedAnalyzer,
+ ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
+ maxShingleSize,
+ ShingleFilter.TOKEN_SEPARATOR,
+ outputUnigrams,
+ false);
getRunData().setAnalyzer(analyzer);
}