diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3b7a34867e3..35c4cba3e37 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -144,6 +144,16 @@ API Changes
instead of extending DocIdSetIterator. asTwoPhaseIterator() has been renamed
to twoPhaseIterator() for consistency. (Adrien Grand)
+* LUCENE-6973: TeeSinkTokenFilter no longer accepts a SinkFilter (the latter
+ has been removed). If you wish to filter the sinks, you can wrap them with
+ any other TokenFilter (e.g. a FilteringTokenFilter). Also, you can no longer
+ add a SinkTokenStream to an existing TeeSinkTokenFilter. If you need to
+ share multiple streams with a single sink, chain them with multiple
+ TeeSinkTokenFilters.
+ DateRecognizerSinkFilter was renamed to DateRecognizerFilter and moved under
+ analysis/common. TokenTypeSinkFilter was removed (use TypeTokenFilter instead).
+ TokenRangeSinkFilter was removed. (Shai Erera, Uwe Schindler)
+
Optimizations
* LUCENE-6951: Improve GeoPointInPolygonQuery using point orientation based
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java
new file mode 100644
index 00000000000..52763f45ecf
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java
@@ -0,0 +1,61 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.util.Locale;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.FilteringTokenFilter;
+
+/** Filters all tokens that cannot be parsed to a date, using the provided {@link DateFormat}. */
+public class DateRecognizerFilter extends FilteringTokenFilter {
+
+ public static final String DATE_TYPE = "date";
+
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final DateFormat dateFormat;
+
+ /**
+ * Uses {@link DateFormat#DEFAULT} and {@link Locale#ENGLISH} to create a {@link DateFormat} instance.
+ */
+ public DateRecognizerFilter(TokenStream input) {
+ this(input, null);
+ }
+
+ public DateRecognizerFilter(TokenStream input, DateFormat dateFormat) {
+ super(input);
+ this.dateFormat = dateFormat != null ? dateFormat : DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.ENGLISH);
+ }
+
+ @Override
+ public boolean accept() {
+ try {
+ // We don't care about the date, just that the term can be parsed to one.
+ dateFormat.parse(termAtt.toString());
+ return true;
+ } catch (ParseException e) {
+ // This term is not a date.
+ }
+
+ return false;
+ }
+
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilterFactory.java
new file mode 100755
index 00000000000..7f02b8751b1
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilterFactory.java
@@ -0,0 +1,84 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link DateRecognizerFilter}.
+ *
+ *
+ * <fieldType name="text_filter_none_date" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.DateRecognizerFilterFactory" datePattern="yyyy/mm/dd" locale="en-US" />
+ * </analyzer>
+ * </fieldType>
+ *
+ *
+ *
+ * The {@code datePattern} is optional. If omitted, {@link DateRecognizerFilter} will be created with the default date
+ * format of the system. The {@code locale} is optional and if omitted the filter will be created with
+ * {@link Locale#ENGLISH}.
+ */
+public class DateRecognizerFilterFactory extends TokenFilterFactory {
+
+ public static final String DATE_PATTERN = "datePattern";
+ public static final String LOCALE = "locale";
+
+ private final DateFormat dateFormat;
+ private final Locale locale;
+
+ /** Creates a new FingerprintFilterFactory */
+ public DateRecognizerFilterFactory(Map args) {
+ super(args);
+ this.locale = getLocale(get(args, LOCALE));
+ this.dateFormat = getDataFormat(get(args, DATE_PATTERN));
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
+ @Override
+ public TokenStream create(TokenStream input) {
+ return new DateRecognizerFilter(input, dateFormat);
+ }
+
+ private Locale getLocale(String localeStr) {
+ if (localeStr == null) {
+ return Locale.ENGLISH;
+ } else {
+ return new Locale.Builder().setLanguageTag(localeStr).build();
+ }
+ }
+
+ public DateFormat getDataFormat(String datePattern) {
+ if (datePattern != null) {
+ return new SimpleDateFormat(datePattern, locale);
+ } else {
+ return DateFormat.getDateInstance(DateFormat.DEFAULT, locale);
+ }
+ }
+
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
deleted file mode 100644
index d73f86660be..00000000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
+++ /dev/null
@@ -1,69 +0,0 @@
-package org.apache.lucene.analysis.sinks;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.text.DateFormat;
-import java.text.ParseException;
-import java.util.Date;
-import java.util.Locale;
-
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.AttributeSource;
-
-/**
- * Attempts to parse the {@link CharTermAttribute#buffer()} as a Date using a {@link java.text.DateFormat}.
- * If the value is a Date, it will add it to the sink.
- *
- **/
-public class DateRecognizerSinkFilter extends TeeSinkTokenFilter.SinkFilter {
- public static final String DATE_TYPE = "date";
-
- protected DateFormat dateFormat;
- protected CharTermAttribute termAtt;
-
- /**
- * Uses {@link java.text.DateFormat#getDateInstance(int, Locale)
- * DateFormat#getDateInstance(DateFormat.DEFAULT, Locale.ROOT)} as
- * the {@link java.text.DateFormat} object.
- */
- public DateRecognizerSinkFilter() {
- this(DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.ROOT));
- }
-
- public DateRecognizerSinkFilter(DateFormat dateFormat) {
- this.dateFormat = dateFormat;
- }
-
- @Override
- public boolean accept(AttributeSource source) {
- if (termAtt == null) {
- termAtt = source.addAttribute(CharTermAttribute.class);
- }
- try {
- Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date
- if (date != null) {
- return true;
- }
- } catch (ParseException e) {
-
- }
-
- return false;
- }
-
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
index a6a9b579e9b..5c30c590cf6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
@@ -18,233 +18,155 @@ package org.apache.lucene.analysis.sinks;
*/
import java.io.IOException;
-import java.lang.ref.WeakReference;
+import java.util.ArrayList;
import java.util.Iterator;
-import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
/**
- * This TokenFilter provides the ability to set aside attribute states
- * that have already been analyzed. This is useful in situations where multiple fields share
- * many common analysis steps and then go their separate ways.
+ * This TokenFilter provides the ability to set aside attribute states that have already been analyzed. This is useful
+ * in situations where multiple fields share many common analysis steps and then go their separate ways.
+ *
*
- * It is also useful for doing things like entity extraction or proper noun analysis as
- * part of the analysis workflow and saving off those tokens for use in another field.
+ * It is also useful for doing things like entity extraction or proper noun analysis as part of the analysis workflow
+ * and saving off those tokens for use in another field.
+ *
*
*
-TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader1));
-TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
-TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
-
-TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader2));
-source2.addSinkTokenStream(sink1);
-source2.addSinkTokenStream(sink2);
-
-TokenStream final1 = new LowerCaseFilter(version, source1);
-TokenStream final2 = source2;
-TokenStream final3 = new EntityDetect(sink1);
-TokenStream final4 = new URLDetect(sink2);
-
-d.add(new TextField("f1", final1, Field.Store.NO));
-d.add(new TextField("f2", final2, Field.Store.NO));
-d.add(new TextField("f3", final3, Field.Store.NO));
-d.add(new TextField("f4", final4, Field.Store.NO));
+ * TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer());
+ * TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+ * TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+ *
+ * TokenStream final1 = new LowerCaseFilter(source1);
+ * TokenStream final2 = new EntityDetect(sink1);
+ * TokenStream final3 = new URLDetect(sink2);
+ *
+ * d.add(new TextField("f1", final1));
+ * d.add(new TextField("f2", final2));
+ * d.add(new TextField("f3", final3));
*
- * In this example, sink1
and sink2
will both get tokens from both
- * reader1
and reader2
after whitespace tokenizer
- * and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
- * It is important, that tees are consumed before sinks (in the above example, the field names must be
- * less the sink's field names). If you are not sure, which stream is consumed first, you can simply
- * add another sink and then pass all tokens to the sinks at once using {@link #consumeAllTokens}.
- * This TokenFilter is exhausted after this. In the above example, change
- * the example above to:
- *
-...
-TokenStream final1 = new LowerCaseFilter(version, source1.newSinkTokenStream());
-TokenStream final2 = source2.newSinkTokenStream();
-sink1.consumeAllTokens();
-sink2.consumeAllTokens();
-...
- *
- * In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
- * Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+ *
+ *
+ * In this example, {@code sink1} and {@code sink2} will both get tokens from {@code source1} after whitespace
+ * tokenization, and will further do additional token filtering, e.g. detect entities and URLs.
+ *
+ *
+ *
+ * NOTE: it is important, that tees are consumed before sinks, therefore you should add them to the document
+ * before the sinks. In the above example, f1 is added before the other fields, and so by the time they are
+ * processed, it has already been consumed, which is the correct way to index the three streams. If for some reason you
+ * cannot ensure that, you should call {@link #consumeAllTokens()} before adding the sinks to document fields.
*/
public final class TeeSinkTokenFilter extends TokenFilter {
- private final List> sinks = new LinkedList<>();
-
- /**
- * Instantiates a new TeeSinkTokenFilter.
- */
+
+ private final States cachedStates = new States();
+
public TeeSinkTokenFilter(TokenStream input) {
super(input);
}
- /**
- * Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream.
- */
- public SinkTokenStream newSinkTokenStream() {
- return newSinkTokenStream(ACCEPT_ALL_FILTER);
+ /** Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream. */
+ public TokenStream newSinkTokenStream() {
+ return new SinkTokenStream(this.cloneAttributes(), cachedStates);
}
-
+
/**
- * Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream
- * that pass the supplied filter.
- * @see SinkFilter
- */
- public SinkTokenStream newSinkTokenStream(SinkFilter filter) {
- SinkTokenStream sink = new SinkTokenStream(this.cloneAttributes(), filter);
- this.sinks.add(new WeakReference<>(sink));
- return sink;
- }
-
- /**
- * Adds a {@link SinkTokenStream} created by another TeeSinkTokenFilter
- * to this one. The supplied stream will also receive all consumed tokens.
- * This method can be used to pass tokens from two different tees to one sink.
- */
- public void addSinkTokenStream(final SinkTokenStream sink) {
- // check that sink has correct factory
- if (!this.getAttributeFactory().equals(sink.getAttributeFactory())) {
- throw new IllegalArgumentException("The supplied sink is not compatible to this tee");
- }
- // add eventually missing attribute impls to the existing sink
- for (Iterator it = this.cloneAttributes().getAttributeImplsIterator(); it.hasNext(); ) {
- sink.addAttributeImpl(it.next());
- }
- this.sinks.add(new WeakReference<>(sink));
- }
-
- /**
- * TeeSinkTokenFilter
passes all tokens to the added sinks
- * when itself is consumed. To be sure, that all tokens from the input
- * stream are passed to the sinks, you can call this methods.
- * This instance is exhausted after this, but all sinks are instant available.
+ * TeeSinkTokenFilter
passes all tokens to the added sinks when itself is consumed. To be sure that all
+ * tokens from the input stream are passed to the sinks, you can call this methods. This instance is exhausted after
+ * this method returns, but all sinks are instant available.
*/
public void consumeAllTokens() throws IOException {
while (incrementToken()) {}
}
-
+
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- // capture state lazily - maybe no SinkFilter accepts this state
- AttributeSource.State state = null;
- for (WeakReference ref : sinks) {
- final SinkTokenStream sink = ref.get();
- if (sink != null) {
- if (sink.accept(this)) {
- if (state == null) {
- state = this.captureState();
- }
- sink.addState(state);
- }
- }
- }
+ cachedStates.add(captureState());
return true;
}
-
+
return false;
}
-
+
@Override
public final void end() throws IOException {
super.end();
- AttributeSource.State finalState = captureState();
- for (WeakReference ref : sinks) {
- final SinkTokenStream sink = ref.get();
- if (sink != null) {
- sink.setFinalState(finalState);
- }
- }
+ cachedStates.setFinalState(captureState());
}
-
- /**
- * A filter that decides which {@link AttributeSource} states to store in the sink.
- */
- public static abstract class SinkFilter {
- /**
- * Returns true, iff the current state of the passed-in {@link AttributeSource} shall be stored
- * in the sink.
- */
- public abstract boolean accept(AttributeSource source);
-
- /**
- * Called by {@link SinkTokenStream#reset()}. This method does nothing by default
- * and can optionally be overridden.
- */
- public void reset() throws IOException {
- // nothing to do; can be overridden
- }
+
+ @Override
+ public void reset() throws IOException {
+ cachedStates.reset();
+ super.reset();
}
-
- /**
- * TokenStream output from a tee with optional filtering.
- */
+
+ /** TokenStream output from a tee. */
public static final class SinkTokenStream extends TokenStream {
- private final List cachedStates = new LinkedList<>();
- private AttributeSource.State finalState;
+ private final States cachedStates;
private Iterator it = null;
- private SinkFilter filter;
-
- private SinkTokenStream(AttributeSource source, SinkFilter filter) {
+
+ private SinkTokenStream(AttributeSource source, States cachedStates) {
super(source);
- this.filter = filter;
+ this.cachedStates = cachedStates;
}
-
- private boolean accept(AttributeSource source) {
- return filter.accept(source);
- }
-
- private void addState(AttributeSource.State state) {
- if (it != null) {
- throw new IllegalStateException("The tee must be consumed before sinks are consumed.");
- }
- cachedStates.add(state);
- }
-
- private void setFinalState(AttributeSource.State finalState) {
- this.finalState = finalState;
- }
-
+
@Override
public final boolean incrementToken() {
- // lazy init the iterator
- if (it == null) {
- it = cachedStates.iterator();
- }
-
if (!it.hasNext()) {
return false;
}
-
+
AttributeSource.State state = it.next();
restoreState(state);
return true;
}
-
+
@Override
- public final void end() {
+ public void end() throws IOException {
+ State finalState = cachedStates.getFinalState();
if (finalState != null) {
restoreState(finalState);
}
}
-
+
@Override
public final void reset() {
- it = cachedStates.iterator();
+ it = cachedStates.getStates();
}
}
-
- private static final SinkFilter ACCEPT_ALL_FILTER = new SinkFilter() {
- @Override
- public boolean accept(AttributeSource source) {
- return true;
+
+ /** A convenience wrapper for storing the cached states as well the final state of the stream. */
+ private static final class States {
+
+ private final List states = new ArrayList<>();
+ private State finalState;
+
+ public States() {}
+
+ void setFinalState(State finalState) {
+ this.finalState = finalState;
}
- };
-
+
+ State getFinalState() {
+ return finalState;
+ }
+
+ void add(State state) {
+ states.add(state);
+ }
+
+ Iterator getStates() {
+ return states.iterator();
+ }
+
+ void reset() {
+ finalState = null;
+ states.clear();
+ }
+ }
+
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java
deleted file mode 100644
index 4fd1945afde..00000000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java
+++ /dev/null
@@ -1,61 +0,0 @@
-package org.apache.lucene.analysis.sinks;
-
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-import java.io.IOException;
-
-import org.apache.lucene.util.AttributeSource;
-
-/**
- * Counts the tokens as they go by and saves to the internal list those between the range of lower and upper, exclusive of upper
- *
- **/
-public class TokenRangeSinkFilter extends TeeSinkTokenFilter.SinkFilter {
- private int lower;
- private int upper;
- private int count;
-
- public TokenRangeSinkFilter(int lower, int upper) {
- if (lower < 1) {
- throw new IllegalArgumentException("lower must be greater than zero");
- }
- if (lower > upper) {
- throw new IllegalArgumentException("lower must not be greater than upper");
- }
- this.lower = lower;
- this.upper = upper;
- }
-
-
- @Override
- public boolean accept(AttributeSource source) {
- try {
- if (count >= lower && count < upper){
- return true;
- }
- return false;
- } finally {
- count++;
- }
- }
-
- @Override
- public void reset() throws IOException {
- count = 0;
- }
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkFilter.java
deleted file mode 100644
index b10bd7841ae..00000000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkFilter.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package org.apache.lucene.analysis.sinks;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.util.AttributeSource;
-
-/**
- * Adds a token to the sink if it has a specific type.
- */
-public class TokenTypeSinkFilter extends TeeSinkTokenFilter.SinkFilter {
- private String typeToMatch;
- private TypeAttribute typeAtt;
-
- public TokenTypeSinkFilter(String typeToMatch) {
- this.typeToMatch = typeToMatch;
- }
-
- @Override
- public boolean accept(AttributeSource source) {
- if (typeAtt == null) {
- typeAtt = source.addAttribute(TypeAttribute.class);
- }
-
- //check to see if this is a Category
- return (typeToMatch.equals(typeAtt.type()));
- }
-
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/package-info.java
index c8d5e3af6da..964e5f43aaa 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/package-info.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/package-info.java
@@ -16,8 +16,6 @@
*/
/**
- * {@link org.apache.lucene.analysis.sinks.TeeSinkTokenFilter} and implementations
- * of {@link org.apache.lucene.analysis.sinks.TeeSinkTokenFilter.SinkFilter} that
- * might be useful.
+ * {@link org.apache.lucene.analysis.sinks.TeeSinkTokenFilter}.
*/
package org.apache.lucene.analysis.sinks;
diff --git a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
index 5b3f5787671..8b19cdfc45f 100644
--- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
+++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@@ -61,6 +61,7 @@ org.apache.lucene.analysis.lv.LatvianStemFilterFactory
org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory
org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory
org.apache.lucene.analysis.miscellaneous.CodepointCountFilterFactory
+org.apache.lucene.analysis.miscellaneous.DateRecognizerFilterFactory
org.apache.lucene.analysis.miscellaneous.FingerprintFilterFactory
org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilterFactory
org.apache.lucene.analysis.miscellaneous.KeepWordFilterFactory
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
index bd5dac5af43..d218d891abd 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@@ -31,6 +31,7 @@ import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -41,6 +42,7 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.Set;
@@ -574,6 +576,13 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
}
}
});
+ put(DateFormat.class, new ArgProducer() {
+ @Override
+ public Object create(Random random) {
+ if (random.nextBoolean()) return null;
+ return DateFormat.getDateInstance(DateFormat.DEFAULT, randomLocale(random));
+ }
+ });
}};
static final Set> allowedTokenizerArgs, allowedTokenFilterArgs, allowedCharFilterArgs;
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilterFactoryTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilterFactoryTest.java
new file mode 100755
index 00000000000..241a0e5cab4
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilterFactoryTest.java
@@ -0,0 +1,43 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+public class DateRecognizerFilterFactoryTest extends BaseTokenStreamTestCase {
+
+ public void testBadLanguageTagThrowsException() {
+ try {
+ final Map args = new HashMap<>();
+ args.put(DateRecognizerFilterFactory.LOCALE, "en_US");
+ new DateRecognizerFilterFactory(args);
+ fail("Bad language tag should have thrown an exception");
+ } catch (Exception e) {
+ // expected;
+ }
+ }
+
+ public void testGoodLocaleParsesWell() {
+ final Map args = new HashMap<>();
+ args.put(DateRecognizerFilterFactory.LOCALE, "en-US");
+ new DateRecognizerFilterFactory(args);
+ }
+
+}
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilterTest.java
new file mode 100644
index 00000000000..6ae81e27d48
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilterTest.java
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Locale;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+
+public class DateRecognizerFilterTest extends BaseTokenStreamTestCase {
+
+ public void test() throws IOException {
+ final String test = "The red fox jumped over the lazy dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
+ final DateFormat dateFormat = new SimpleDateFormat("MM/dd/yyyy", Locale.ENGLISH);
+ try (TokenStream ts = new DateRecognizerFilter(whitespaceMockTokenizer(test), dateFormat);) {
+ assertStreamHasNumberOfTokens(ts, 2);
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
deleted file mode 100644
index 39cca608445..00000000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package org.apache.lucene.analysis.sinks;
-
-/*
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.SimpleDateFormat;
-import java.util.Locale;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
-
-public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
-
- public void test() throws IOException {
- DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.ROOT));
- String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
- final MockTokenizer input = new MockTokenizer(MockTokenizer.WHITESPACE, false);
- input.setReader(new StringReader(test));
- TeeSinkTokenFilter tee = new TeeSinkTokenFilter(input);
- TeeSinkTokenFilter.SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);
- int count = 0;
-
- tee.reset();
- while (tee.incrementToken()) {
- count++;
- }
- assertTrue(count + " does not equal: " + 18, count == 18);
-
- int sinkCount = 0;
- sink.reset();
- while (sink.incrementToken()) {
- sinkCount++;
- }
- assertTrue("sink Size: " + sinkCount + " is not: " + 2, sinkCount == 2);
-
- }
-}
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
index a0e2636e2b6..4b2b8c4dfc8 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
@@ -20,28 +20,33 @@ import java.io.IOException;
import java.io.StringReader;
import java.util.Locale;
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CachingTokenFilter;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.English;
-
/**
* tests for the TestTeeSinkTokenFilter
*/
@@ -67,22 +72,6 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
}
}
- static final TeeSinkTokenFilter.SinkFilter theFilter = new TeeSinkTokenFilter.SinkFilter() {
- @Override
- public boolean accept(AttributeSource a) {
- CharTermAttribute termAtt = a.getAttribute(CharTermAttribute.class);
- return termAtt.toString().equalsIgnoreCase("The");
- }
- };
-
- static final TeeSinkTokenFilter.SinkFilter dogFilter = new TeeSinkTokenFilter.SinkFilter() {
- @Override
- public boolean accept(AttributeSource a) {
- CharTermAttribute termAtt = a.getAttribute(CharTermAttribute.class);
- return termAtt.toString().equalsIgnoreCase("Dogs");
- }
- };
-
// LUCENE-1448
// TODO: instead of testing it this way, we can test
// with BaseTokenStreamTestCase now...
@@ -128,41 +117,29 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
public void testGeneral() throws IOException {
final TeeSinkTokenFilter source = new TeeSinkTokenFilter(whitespaceMockTokenizer(buffer1.toString()));
- final TokenStream sink1 = source.newSinkTokenStream();
- final TokenStream sink2 = source.newSinkTokenStream(theFilter);
+ final TokenStream sink = source.newSinkTokenStream();
source.addAttribute(CheckClearAttributesAttribute.class);
- sink1.addAttribute(CheckClearAttributesAttribute.class);
- sink2.addAttribute(CheckClearAttributesAttribute.class);
+ sink.addAttribute(CheckClearAttributesAttribute.class);
assertTokenStreamContents(source, tokens1);
- assertTokenStreamContents(sink1, tokens1);
- assertTokenStreamContents(sink2, new String[]{"The", "the"});
+ assertTokenStreamContents(sink, tokens1);
}
public void testMultipleSources() throws Exception {
final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(whitespaceMockTokenizer(buffer1.toString()));
- final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter);
- final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter);
final TokenStream source1 = new CachingTokenFilter(tee1);
tee1.addAttribute(CheckClearAttributesAttribute.class);
- dogDetector.addAttribute(CheckClearAttributesAttribute.class);
- theDetector.addAttribute(CheckClearAttributesAttribute.class);
MockTokenizer tokenizer = new MockTokenizer(tee1.getAttributeFactory(), MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader(buffer2.toString()));
final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(tokenizer);
- tee2.addSinkTokenStream(dogDetector);
- tee2.addSinkTokenStream(theDetector);
final TokenStream source2 = tee2;
assertTokenStreamContents(source1, tokens1);
assertTokenStreamContents(source2, tokens2);
- assertTokenStreamContents(theDetector, new String[]{"The", "the", "The", "the"});
- assertTokenStreamContents(dogDetector, new String[]{"Dogs", "Dogs"});
-
TokenStream lowerCasing = new LowerCaseFilter(source1);
String[] lowerCaseTokens = new String[tokens1.length];
for (int i = 0; i < tokens1.length; i++)
@@ -170,7 +147,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
assertTokenStreamContents(lowerCasing, lowerCaseTokens);
}
- private StandardTokenizer standardTokenizer(StringBuilder builder) throws IOException {
+ private StandardTokenizer standardTokenizer(StringBuilder builder) {
StandardTokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(builder.toString()));
return tokenizer;
@@ -179,6 +156,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
/**
* Not an explicit test, just useful to print out some info on performance
*/
+ @SuppressWarnings("resource")
public void performance() throws Exception {
int[] tokCount = {100, 500, 1000, 2000, 5000, 10000};
int[] modCounts = {1, 2, 5, 10, 20, 50, 100, 200, 500};
@@ -190,7 +168,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
}
//make sure we produce the same tokens
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(standardTokenizer(buffer)));
- TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100));
+ TokenStream sink = new ModuloTokenFilter(teeStream.newSinkTokenStream(), 100);
teeStream.consumeAllTokens();
TokenStream stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), 100);
CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class);
@@ -223,7 +201,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
start = System.currentTimeMillis();
for (int i = 0; i < 20; i++) {
teeStream = new TeeSinkTokenFilter(new StandardFilter( standardTokenizer(buffer)));
- sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(modCounts[j]));
+ sink = new ModuloTokenFilter(teeStream.newSinkTokenStream(), modCounts[j]);
PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class);
while (teeStream.incrementToken()) {
sinkPos += posIncrAtt.getPositionIncrement();
@@ -270,17 +248,18 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
}
}
- class ModuloSinkFilter extends TeeSinkTokenFilter.SinkFilter {
+ class ModuloSinkFilter extends FilteringTokenFilter {
int count = 0;
int modCount;
- ModuloSinkFilter(int mc) {
+ ModuloSinkFilter(TokenStream input, int mc) {
+ super(input);
modCount = mc;
}
@Override
- public boolean accept(AttributeSource a) {
- boolean b = (a != null && count % modCount == 0);
+ protected boolean accept() throws IOException {
+ boolean b = count % modCount == 0;
count++;
return b;
}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
deleted file mode 100644
index b1acecd8a99..00000000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
+++ /dev/null
@@ -1,55 +0,0 @@
-package org.apache.lucene.analysis.sinks;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.junit.Test;
-
-public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
-
- public void test() throws IOException {
- TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4);
- String test = "The quick red fox jumped over the lazy brown dogs";
- TeeSinkTokenFilter tee = new TeeSinkTokenFilter(whitespaceMockTokenizer(test));
- TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);
-
- int count = 0;
- tee.reset();
- while(tee.incrementToken()) {
- count++;
- }
-
- int sinkCount = 0;
- rangeToks.reset();
- while (rangeToks.incrementToken()) {
- sinkCount++;
- }
-
- assertTrue(count + " does not equal: " + 10, count == 10);
- assertTrue("rangeToks Size: " + sinkCount + " is not: " + 2, sinkCount == 2);
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testIllegalArguments() throws Exception {
- new TokenRangeSinkFilter(4, 2);
- }
-}
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
deleted file mode 100644
index d03ee35eef2..00000000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
+++ /dev/null
@@ -1,78 +0,0 @@
-package org.apache.lucene.analysis.sinks;
-
-/*
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-
-public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
-
- public void test() throws IOException {
- TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
- String test = "The quick red fox jumped over the lazy brown dogs";
-
- TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(whitespaceMockTokenizer(test)));
- TeeSinkTokenFilter.SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
-
- boolean seenDogs = false;
-
- CharTermAttribute termAtt = ttf.addAttribute(CharTermAttribute.class);
- TypeAttribute typeAtt = ttf.addAttribute(TypeAttribute.class);
- ttf.reset();
- while (ttf.incrementToken()) {
- if (termAtt.toString().equals("dogs")) {
- seenDogs = true;
- assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true);
- } else {
- assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals("word"));
- }
- }
- assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
-
- int sinkCount = 0;
- sink.reset();
- while (sink.incrementToken()) {
- sinkCount++;
- }
-
- assertTrue("sink Size: " + sinkCount + " is not: " + 1, sinkCount == 1);
- }
-
- private class WordTokenFilter extends TokenFilter {
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
-
- private WordTokenFilter(TokenStream input) {
- super(input);
- }
-
- @Override
- public final boolean incrementToken() throws IOException {
- if (!input.incrementToken()) return false;
-
- if (termAtt.toString().equals("dogs")) {
- typeAtt.setType("D");
- }
- return true;
- }
- }
-}
\ No newline at end of file
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
index 9afd785f884..183b900446a 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
@@ -447,6 +447,17 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
checkRandomData(random, a, iterations, 20, simple, true);
}
+ /** Asserts that the given stream has expected number of tokens. */
+ public static void assertStreamHasNumberOfTokens(TokenStream ts, int expectedCount) throws IOException {
+ ts.reset();
+ int count = 0;
+ while (ts.incrementToken()) {
+ count++;
+ }
+ ts.end();
+ assertEquals("wrong number of tokens", expectedCount, count);
+ }
+
static class AnalysisThread extends Thread {
final int iterations;
final int maxWordLength;