diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
new file mode 100644
index 00000000000..13bbc618254
--- /dev/null
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
@@ -0,0 +1,85 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.Payload;
+
+import java.io.IOException;
+
+
+/**
+ * Assigns a payload to a token based on the {@link org.apache.lucene.analysis.Token#type()}
+ *
+ **/
+public class NumericPayloadTokenFilter extends TokenFilter {
+
+ private String typeMatch;
+ private Payload thePayload;
+
+ public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
+ super(input);
+ //Need to encode the payload
+ thePayload = new Payload(encodePayload(payload));
+ this.typeMatch = typeMatch;
+ }
+
+ public static byte[] encodePayload(float payload) {
+ byte[] result = new byte[4];
+ int tmp = Float.floatToIntBits(payload);
+ result[0] = (byte)(tmp >> 24);
+ result[1] = (byte)(tmp >> 16);
+ result[2] = (byte)(tmp >> 8);
+ result[3] = (byte) tmp;
+
+ return result;
+ }
+
+ /**
+ * @see #decodePayload(byte[], int)
+ * @see #encodePayload(float)
+ */
+ public static float decodePayload(byte [] bytes){
+ return decodePayload(bytes, 0);
+ }
+
+ /**
+ * Decode the payload that was encoded using {@link #encodePayload(float)}.
+ * NOTE: the length of the array must be at least offset + 4 long.
+ * @param bytes The bytes to decode
+ * @param offset The offset into the array.
+ * @return The float that was encoded
+ *
+ * @see #encodePayload(float)
+ */
+ public static final float decodePayload(byte [] bytes, int offset){
+ int tmp = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
+ | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
+ return Float.intBitsToFloat(tmp);
+ }
+
+ public Token next(Token result) throws IOException {
+ result = input.next(result);
+ if (result != null && result.type().equals(typeMatch)){
+ result.setPayload(thePayload);
+ }
+ return result;
+ }
+}
diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
new file mode 100644
index 00000000000..d4dbae13332
--- /dev/null
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
@@ -0,0 +1,49 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.Payload;
+
+import java.io.IOException;
+
+
+/**
+ * Makes the {@link org.apache.lucene.analysis.Token#type()} a payload.
+ *
+ * Encodes the type using {@link String#getBytes(String)} with "UTF-8" as the encoding
+ *
+ **/
+public class TypeAsPayloadTokenFilter extends TokenFilter {
+
+ public TypeAsPayloadTokenFilter(TokenStream input) {
+ super(input);
+
+ }
+
+
+ public Token next(Token result) throws IOException {
+ result = input.next(result);
+ if (result != null && result.type() != null && result.type().equals("") == false){
+ result.setPayload(new Payload(result.type().getBytes("UTF-8")));
+ }
+ return result;
+ }
+}
\ No newline at end of file
diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html
new file mode 100644
index 00000000000..abababcc93f
--- /dev/null
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html
@@ -0,0 +1,31 @@
+
+
+ org.apache.lucene.analysis.payloads
+
+
+Provides various convenience classes for creating payloads on Tokens.
+
+
+
+
+
\ No newline at end of file
diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
new file mode 100644
index 00000000000..77f0f86ac29
--- /dev/null
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
@@ -0,0 +1,87 @@
+package org.apache.lucene.analysis.sinks;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.SinkTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.text.ParseException;
+import java.util.List;
+import java.util.Date;
+
+
+/**
+ * Attempts to parse the {@link org.apache.lucene.analysis.Token#termBuffer()} as a Date using a {@link java.text.DateFormat}.
+ * If the value is a Date, it will add it to the sink.
+ *
+ * Also marks the sink token with {@link org.apache.lucene.analysis.Token#type()} equal to {@link #DATE_TYPE}
+ *
+ *
+ **/
+public class DateRecognizerSinkTokenizer extends SinkTokenizer {
+ public static final String DATE_TYPE = "date";
+
+ protected DateFormat dateFormat;
+
+ /**
+ * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
+ */
+ public DateRecognizerSinkTokenizer() {
+ this(null, SimpleDateFormat.getDateInstance());
+ }
+
+ public DateRecognizerSinkTokenizer(DateFormat dateFormat) {
+ this(null, dateFormat);
+ }
+
+ /**
+ * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
+ * @param input The input list of Tokens that are already Dates. They should be marked as type {@link #DATE_TYPE} for completeness
+ */
+ public DateRecognizerSinkTokenizer(List/**/ input) {
+ this(input, SimpleDateFormat.getDateInstance());
+ }
+
+ /**
+ *
+ * @param input
+ * @param dateFormat The date format to use to try and parse the date. Note, this SinkTokenizer makes no attempt to synchronize the DateFormat object
+ */
+ public DateRecognizerSinkTokenizer(List/**/ input, DateFormat dateFormat) {
+ super(input);
+ this.dateFormat = dateFormat;
+ }
+
+
+ public void add(Token t) {
+ //Check to see if this token is a date
+ if (t != null) {
+ try {
+ Date date = dateFormat.parse(new String(t.termBuffer(), 0, t.termLength()));//We don't care about the date, just that we can parse it as a date
+ if (date != null) {
+ t.setType(DATE_TYPE);
+ lst.add(t.clone());
+ }
+ } catch (ParseException e) {
+
+ }
+ }
+
+ }
+}
diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java
new file mode 100644
index 00000000000..533e9d1577b
--- /dev/null
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java
@@ -0,0 +1,55 @@
+package org.apache.lucene.analysis.sinks;
+
+import org.apache.lucene.analysis.SinkTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.io.IOException;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Counts the tokens as they go by and saves to the internal list those between the range of lower and upper, exclusive of upper
+ *
+ **/
+public class TokenRangeSinkTokenizer extends SinkTokenizer {
+ private int lower;
+ private int upper;
+ private int count;
+
+ public TokenRangeSinkTokenizer(int lower, int upper) {
+ this.lower = lower;
+ this.upper = upper;
+ }
+
+ public TokenRangeSinkTokenizer(int initCap, int lower, int upper) {
+ super(initCap);
+ this.lower = lower;
+ this.upper = upper;
+ }
+
+ public void add(Token t) {
+ if (count >= lower && count < upper){
+ super.add(t);
+ }
+ count++;
+ }
+
+ public void reset() throws IOException {
+ count = 0;
+ }
+}
diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
new file mode 100644
index 00000000000..f814555035c
--- /dev/null
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.sinks;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.SinkTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.util.List;
+
+
+/**
+ * If the {@link org.apache.lucene.analysis.Token#type()} matches the passed in typeToMatch
then
+ * add it to the sink
+ *
+ **/
+public class TokenTypeSinkTokenizer extends SinkTokenizer {
+
+ private String typeToMatch;
+
+ public TokenTypeSinkTokenizer(String typeToMatch) {
+ this.typeToMatch = typeToMatch;
+ }
+
+ public TokenTypeSinkTokenizer(int initCap, String typeToMatch) {
+ super(initCap);
+ this.typeToMatch = typeToMatch;
+ }
+
+ public TokenTypeSinkTokenizer(List/**/ input, String typeToMatch) {
+ super(input);
+ this.typeToMatch = typeToMatch;
+ }
+
+ public void add(Token t) {
+ //check to see if this is a Category
+ if (t != null && typeToMatch.equals(t.type())){
+ lst.add(t.clone());
+ }
+ }
+}
diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html
new file mode 100644
index 00000000000..f60cd21d441
--- /dev/null
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html
@@ -0,0 +1,30 @@
+
+
+ org.apache.lucene.analysis.sinks
+
+
+Implementations of the SinkTokenizer that might be useful.
+
+
+
+
+
\ No newline at end of file
diff --git a/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java b/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
new file mode 100644
index 00000000000..edfbb72abd0
--- /dev/null
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
@@ -0,0 +1,79 @@
+package org.apache.lucene.analysis.payloads;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+public class NumericPayloadTokenFilterTest extends TestCase {
+
+
+ public NumericPayloadTokenFilterTest(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void test() throws IOException {
+ String test = "The quick red fox jumped over the lazy brown dogs";
+
+ NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))), 3, "D");
+ Token tok = new Token();
+ boolean seenDogs = false;
+ while ((tok = nptf.next(tok)) != null){
+ if (tok.termText().equals("dogs")){
+ seenDogs = true;
+ assertTrue(tok.type() + " is not equal to " + "D", tok.type().equals("D") == true);
+ assertTrue("tok.getPayload() is null and it shouldn't be", tok.getPayload() != null);
+ byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length
+ assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length == tok.getPayload().length());
+ assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset() == 0);
+ float pay = NumericPayloadTokenFilter.decodePayload(bytes);
+ assertTrue(pay + " does not equal: " + 3, pay == 3);
+ } else {
+ assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
+ }
+ }
+ assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
+ }
+
+ private class WordTokenFilter extends TokenFilter {
+ private WordTokenFilter(TokenStream input) {
+ super(input);
+ }
+
+ public Token next(Token result) throws IOException {
+ result = input.next(result);
+ if (result != null && result.termText().equals("dogs")) {
+ result.setType("D");
+ }
+ return result;
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java b/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
new file mode 100644
index 00000000000..03f964d30cd
--- /dev/null
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
@@ -0,0 +1,76 @@
+package org.apache.lucene.analysis.payloads;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+public class TypeAsPayloadTokenFilterTest extends TestCase {
+
+
+ public TypeAsPayloadTokenFilterTest(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+ }
+
+ protected void tearDown() {
+
+ }
+
+
+ public void test() throws IOException {
+ String test = "The quick red fox jumped over the lazy brown dogs";
+
+ TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))));
+ Token tok = new Token();
+ int count = 0;
+ while ((tok = nptf.next(tok)) != null){
+ assertTrue(tok.type() + " is not null and it should be", tok.type().equals(String.valueOf(Character.toUpperCase(tok.termBuffer()[0]))));
+ assertTrue("tok.getPayload() is null and it shouldn't be", tok.getPayload() != null);
+ String type = new String(tok.getPayload().getData(), "UTF-8");
+ assertTrue("type is null and it shouldn't be", type != null);
+ assertTrue(type + " is not equal to " + tok.type(), type.equals(tok.type()) == true);
+ count++;
+ }
+ assertTrue(count + " does not equal: " + 10, count == 10);
+ }
+
+ private class WordTokenFilter extends TokenFilter {
+ private WordTokenFilter(TokenStream input) {
+ super(input);
+ }
+
+
+
+ public Token next(Token result) throws IOException {
+ result = input.next(result);
+ if (result != null) {
+ result.setType(String.valueOf(Character.toUpperCase(result.termBuffer()[0])));
+ }
+ return result;
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java b/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
new file mode 100644
index 00000000000..64868747c57
--- /dev/null
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
@@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.sinks;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TeeTokenFilter;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.io.StringReader;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+
+public class DateRecognizerSinkTokenizerTest extends TestCase {
+
+
+ public DateRecognizerSinkTokenizerTest(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void test() throws IOException {
+ DateRecognizerSinkTokenizer sink = new DateRecognizerSinkTokenizer(new SimpleDateFormat("MM/dd/yyyy"));
+ String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
+ TeeTokenFilter tee = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(test)), sink);
+ Token tok = null;
+ int count = 0;
+ while ((tok = tee.next()) != null){
+ assertTrue("tok is null and it shouldn't be", tok != null);
+ if (tok.termBuffer()[0] == '7'){
+ assertTrue(tok.type() + " is not equal to " + DateRecognizerSinkTokenizer.DATE_TYPE,
+ tok.type().equals(DateRecognizerSinkTokenizer.DATE_TYPE) == true);
+ }
+ count++;
+ }
+ assertTrue(count + " does not equal: " + 18, count == 18);
+ assertTrue("sink Size: " + sink.getTokens().size() + " is not: " + 2, sink.getTokens().size() == 2);
+
+ }
+}
\ No newline at end of file
diff --git a/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java b/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
new file mode 100644
index 00000000000..71e849301fb
--- /dev/null
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.sinks;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TeeTokenFilter;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.io.StringReader;
+import java.io.IOException;
+
+public class TokenRangeSinkTokenizerTest extends TestCase {
+
+
+ public TokenRangeSinkTokenizerTest(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void test() throws IOException {
+ TokenRangeSinkTokenizer rangeToks = new TokenRangeSinkTokenizer(2, 4);
+ String test = "The quick red fox jumped over the lazy brown dogs";
+ TeeTokenFilter tee = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(test)), rangeToks);
+ Token tok = null;
+ int count = 0;
+ while ((tok = tee.next()) != null){
+ assertTrue("tok is null and it shouldn't be", tok != null);
+ count++;
+ }
+ assertTrue(count + " does not equal: " + 10, count == 10);
+ assertTrue("rangeToks Size: " + rangeToks.getTokens().size() + " is not: " + 2, rangeToks.getTokens().size() == 2);
+ }
+}
\ No newline at end of file
diff --git a/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java b/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
new file mode 100644
index 00000000000..db57bb7fba8
--- /dev/null
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
@@ -0,0 +1,72 @@
+package org.apache.lucene.analysis.sinks;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.payloads.NumericPayloadTokenFilter;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+public class TokenTypeSinkTokenizerTest extends TestCase {
+
+
+ public TokenTypeSinkTokenizerTest(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void test() throws IOException {
+ TokenTypeSinkTokenizer sink = new TokenTypeSinkTokenizer("D");
+ String test = "The quick red fox jumped over the lazy brown dogs";
+
+ TeeTokenFilter ttf = new TeeTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))), sink);
+ Token tok = new Token();
+ boolean seenDogs = false;
+ while ((tok = ttf.next(tok)) != null) {
+ if (tok.termText().equals("dogs")) {
+ seenDogs = true;
+ assertTrue(tok.type() + " is not equal to " + "D", tok.type().equals("D") == true);
+ } else {
+ assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
+ }
+ }
+ assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
+ assertTrue("sink Size: " + sink.getTokens().size() + " is not: " + 1, sink.getTokens().size() == 1);
+ }
+
+ private class WordTokenFilter extends TokenFilter {
+ private WordTokenFilter(TokenStream input) {
+ super(input);
+ }
+
+ public Token next(Token result) throws IOException {
+ result = input.next(result);
+ if (result != null && result.termText().equals("dogs")) {
+ result.setType("D");
+ }
+ return result;
+ }
+ }
+}
\ No newline at end of file