LUCENE-1077 new sinks and payloads analysis packages

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@602081 13f79535-47bb-0310-9956-ffa450edef68
2007-12-07 12:21:49 +00:00 · 2007-12-07 12:21:49 +00:00 · f9b2e971f2
parent ce9b80539d
commit f9b2e971f2
12 changed files with 732 additions and 0 deletions
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
@ -0,0 +1,85 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.Payload;
+
+import java.io.IOException;
+
+
+/**
+ * Assigns a payload to a token based on the {@link org.apache.lucene.analysis.Token#type()}
+ *
+ **/
+public class NumericPayloadTokenFilter extends TokenFilter {
+
+  private String typeMatch;
+  private Payload thePayload;
+
+  public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
+    super(input);
+    //Need to encode the payload
+    thePayload = new Payload(encodePayload(payload));
+    this.typeMatch = typeMatch;
+  }
+
+  public static byte[] encodePayload(float payload) {
+    byte[] result = new byte[4];
+    int tmp = Float.floatToIntBits(payload);
+    result[0] = (byte)(tmp >> 24);
+    result[1] = (byte)(tmp >> 16);
+    result[2] = (byte)(tmp >>  8);
+    result[3] = (byte) tmp;
+
+    return result;
+  }
+
+  /**
+   * @see #decodePayload(byte[], int)
+   * @see #encodePayload(float)
+   */
+  public static float decodePayload(byte [] bytes){
+    return decodePayload(bytes, 0);
+  }
+
+  /**
+   * Decode the payload that was encoded using {@link #encodePayload(float)}.
+   * NOTE: the length of the array must be at least offset + 4 long.
+   * @param bytes The bytes to decode
+   * @param offset The offset into the array.
+   * @return The float that was encoded
+   *
+   * @see #encodePayload(float) 
+   */
+  public static final float decodePayload(byte [] bytes, int offset){
+    int tmp = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
+         | ((bytes[offset + 2] & 0xFF) <<  8) |  (bytes[offset + 3] & 0xFF);
+    return Float.intBitsToFloat(tmp);
+  }
+
+  public Token next(Token result) throws IOException {
+    result = input.next(result);
+    if (result != null && result.type().equals(typeMatch)){
+      result.setPayload(thePayload);
+    }
+    return result;
+  }
+}
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
@ -0,0 +1,49 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.Payload;
+
+import java.io.IOException;
+
+
+/**
+ * Makes the {@link org.apache.lucene.analysis.Token#type()} a payload.
+ *
+ * Encodes the type using {@link String#getBytes(String)} with "UTF-8" as the encoding
+ *
+ **/
+public class TypeAsPayloadTokenFilter extends TokenFilter {
+
+  public TypeAsPayloadTokenFilter(TokenStream input) {
+    super(input);
+
+  }
+
+
+  public Token next(Token result) throws IOException {
+    result = input.next(result);
+    if (result != null && result.type() != null && result.type().equals("") == false){
+      result.setPayload(new Payload(result.type().getBytes("UTF-8")));
+    }
+    return result;
+  }
+}
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html
@ -0,0 +1,31 @@
+<HTML>
+ <!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ --><HEAD>
+    <TITLE>org.apache.lucene.analysis.payloads</TITLE>
+</HEAD>
+<BODY>
+<DIV>Provides various convenience classes for creating payloads on Tokens.
+</DIV>
+<DIV>&nbsp;</DIV>
+<DIV align="center">
+Copyright &copy; 2007 <A HREF="http://www.apache.org">Apache Software Foundation</A>
+</DIV>
+</BODY>
+</HTML>
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
@ -0,0 +1,87 @@
+package org.apache.lucene.analysis.sinks;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.SinkTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.text.ParseException;
+import java.util.List;
+import java.util.Date;
+
+
+/**
+ * Attempts to parse the {@link org.apache.lucene.analysis.Token#termBuffer()} as a Date using a {@link java.text.DateFormat}.
+ * If the value is a Date, it will add it to the sink.
+ * <p/>
+ * Also marks the sink token with {@link org.apache.lucene.analysis.Token#type()} equal to {@link #DATE_TYPE}
+ *
+ *
+ **/
+public class DateRecognizerSinkTokenizer extends SinkTokenizer {
+  public static final String DATE_TYPE = "date";
+
+  protected DateFormat dateFormat;
+
+  /**
+   * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
+   */
+  public DateRecognizerSinkTokenizer() {
+    this(null, SimpleDateFormat.getDateInstance());
+  }
+
+  public DateRecognizerSinkTokenizer(DateFormat dateFormat) {
+    this(null, dateFormat);
+  }
+
+  /**
+   * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
+   * @param input The input list of Tokens that are already Dates.  They should be marked as type {@link #DATE_TYPE} for completeness
+   */
+  public DateRecognizerSinkTokenizer(List/*<Token>*/ input) {
+    this(input, SimpleDateFormat.getDateInstance());
+  }
+
+  /**
+   *
+   * @param input
+   * @param dateFormat The date format to use to try and parse the date.  Note, this SinkTokenizer makes no attempt to synchronize the DateFormat object
+   */
+  public DateRecognizerSinkTokenizer(List/*<Token>*/ input, DateFormat dateFormat) {
+    super(input);
+    this.dateFormat = dateFormat;
+  }
+
+
+  public void add(Token t) {
+    //Check to see if this token is a date
+    if (t != null) {
+      try {
+        Date date = dateFormat.parse(new String(t.termBuffer(), 0, t.termLength()));//We don't care about the date, just that we can parse it as a date
+        if (date != null) {
+          t.setType(DATE_TYPE);
+          lst.add(t.clone());
+        }
+      } catch (ParseException e) {
+
+      }
+    }
+
+  }
+}
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java
@ -0,0 +1,55 @@
+package org.apache.lucene.analysis.sinks;
+
+import org.apache.lucene.analysis.SinkTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.io.IOException;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Counts the tokens as they go by and saves to the internal list those between the range of lower and upper, exclusive of upper
+ *
+ **/
+public class TokenRangeSinkTokenizer extends SinkTokenizer {
+  private int lower;
+  private int upper;
+  private int count;
+
+  public TokenRangeSinkTokenizer(int lower, int upper) {
+    this.lower = lower;
+    this.upper = upper;
+  }
+
+  public TokenRangeSinkTokenizer(int initCap, int lower, int upper) {
+    super(initCap);
+    this.lower = lower;
+    this.upper = upper;
+  }
+
+  public void add(Token t) {
+    if (count >= lower && count < upper){
+      super.add(t);
+    }
+    count++;
+  }
+
+  public void reset() throws IOException {
+    count = 0;
+  }
+}
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.sinks;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.SinkTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.util.List;
+
+
+/**
+ * If the {@link org.apache.lucene.analysis.Token#type()} matches the passed in <code>typeToMatch</code> then
+ * add it to the sink
+ *
+ **/
+public class TokenTypeSinkTokenizer extends SinkTokenizer {
+
+  private String typeToMatch;
+
+  public TokenTypeSinkTokenizer(String typeToMatch) {
+    this.typeToMatch = typeToMatch;
+  }
+
+  public TokenTypeSinkTokenizer(int initCap, String typeToMatch) {
+    super(initCap);
+    this.typeToMatch = typeToMatch;
+  }
+
+  public TokenTypeSinkTokenizer(List/*<Token>*/ input, String typeToMatch) {
+    super(input);
+    this.typeToMatch = typeToMatch;
+  }
+
+  public void add(Token t) {
+    //check to see if this is a Category
+    if (t != null && typeToMatch.equals(t.type())){
+      lst.add(t.clone());
+    }
+  }
+}
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html
@ -0,0 +1,30 @@
+<HTML>
+ <!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ --><HEAD>
+    <TITLE>org.apache.lucene.analysis.sinks</TITLE>
+</HEAD>
+<BODY>
+<DIV>Implementations of the SinkTokenizer that might be useful.
+</DIV>
+<DIV>&nbsp;</DIV>
+<DIV align="center">
+Copyright &copy; 2007 <A HREF="http://www.apache.org">Apache Software Foundation</A>
+</DIV>
+</BODY>
+</HTML>
--- a/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
@ -0,0 +1,79 @@
+package org.apache.lucene.analysis.payloads;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+public class NumericPayloadTokenFilterTest extends TestCase {
+
+
+  public NumericPayloadTokenFilterTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() throws IOException {
+    String test = "The quick red fox jumped over the lazy brown dogs";
+
+    NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))), 3, "D");
+    Token tok = new Token();
+    boolean seenDogs = false;
+    while ((tok = nptf.next(tok)) != null){
+      if (tok.termText().equals("dogs")){
+        seenDogs = true;
+        assertTrue(tok.type() + " is not equal to " + "D", tok.type().equals("D") == true);
+        assertTrue("tok.getPayload() is null and it shouldn't be", tok.getPayload() != null);
+        byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length
+        assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length == tok.getPayload().length());
+        assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset() == 0);
+        float pay = NumericPayloadTokenFilter.decodePayload(bytes);
+        assertTrue(pay + " does not equal: " + 3, pay == 3);
+      } else {
+        assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
+      }
+    }
+    assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
+  }
+
+  private class WordTokenFilter extends TokenFilter {
+    private WordTokenFilter(TokenStream input) {
+      super(input);
+    }
+
+    public Token next(Token result) throws IOException {
+      result = input.next(result);
+      if (result != null && result.termText().equals("dogs")) {
+        result.setType("D");
+      }
+      return result;
+    }
+  }
+
+}
--- a/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
@ -0,0 +1,76 @@
+package org.apache.lucene.analysis.payloads;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+public class TypeAsPayloadTokenFilterTest extends TestCase {
+
+
+  public TypeAsPayloadTokenFilterTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+
+  public void test() throws IOException {
+    String test = "The quick red fox jumped over the lazy brown dogs";
+
+    TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))));
+    Token tok = new Token();
+    int count = 0;
+    while ((tok = nptf.next(tok)) != null){
+      assertTrue(tok.type() + " is not null and it should be", tok.type().equals(String.valueOf(Character.toUpperCase(tok.termBuffer()[0]))));
+      assertTrue("tok.getPayload() is null and it shouldn't be", tok.getPayload() != null);
+      String type = new String(tok.getPayload().getData(), "UTF-8");
+      assertTrue("type is null and it shouldn't be", type != null);
+      assertTrue(type + " is not equal to " + tok.type(), type.equals(tok.type()) == true);
+      count++;
+    }
+    assertTrue(count + " does not equal: " + 10, count == 10);
+  }
+
+  private class WordTokenFilter extends TokenFilter {
+    private WordTokenFilter(TokenStream input) {
+      super(input);
+    }
+
+
+
+    public Token next(Token result) throws IOException {
+      result = input.next(result);
+      if (result != null) {
+        result.setType(String.valueOf(Character.toUpperCase(result.termBuffer()[0])));
+      }
+      return result;
+    }
+  }
+
+}
--- a/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.sinks;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TeeTokenFilter;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.io.StringReader;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+
+public class DateRecognizerSinkTokenizerTest extends TestCase {
+
+
+  public DateRecognizerSinkTokenizerTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() throws IOException {
+    DateRecognizerSinkTokenizer sink = new DateRecognizerSinkTokenizer(new SimpleDateFormat("MM/dd/yyyy"));
+    String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006  The dogs finally reacted on 7/12/2006";
+    TeeTokenFilter tee = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(test)), sink);
+    Token tok = null;
+    int count = 0;
+    while ((tok = tee.next()) != null){
+      assertTrue("tok is null and it shouldn't be", tok != null);
+      if (tok.termBuffer()[0] == '7'){
+        assertTrue(tok.type() + " is not equal to " + DateRecognizerSinkTokenizer.DATE_TYPE,
+                tok.type().equals(DateRecognizerSinkTokenizer.DATE_TYPE) == true);
+      }
+      count++;
+    }
+    assertTrue(count + " does not equal: " + 18, count == 18);
+    assertTrue("sink Size: " + sink.getTokens().size() + " is not: " + 2, sink.getTokens().size() == 2);
+
+  }
+}
--- a/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.sinks;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TeeTokenFilter;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.io.StringReader;
+import java.io.IOException;
+
+public class TokenRangeSinkTokenizerTest extends TestCase {
+
+
+  public TokenRangeSinkTokenizerTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() throws IOException {
+    TokenRangeSinkTokenizer rangeToks = new TokenRangeSinkTokenizer(2, 4);
+    String test = "The quick red fox jumped over the lazy brown dogs";
+    TeeTokenFilter tee = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(test)), rangeToks);
+    Token tok = null;
+    int count = 0;
+    while ((tok = tee.next()) != null){
+      assertTrue("tok is null and it shouldn't be", tok != null);
+      count++;
+    }
+    assertTrue(count + " does not equal: " + 10, count == 10);
+    assertTrue("rangeToks Size: " + rangeToks.getTokens().size() + " is not: " + 2, rangeToks.getTokens().size() == 2);
+  }
+}
--- a/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
@ -0,0 +1,72 @@
+package org.apache.lucene.analysis.sinks;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.payloads.NumericPayloadTokenFilter;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+public class TokenTypeSinkTokenizerTest extends TestCase {
+
+
+  public TokenTypeSinkTokenizerTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() throws IOException {
+    TokenTypeSinkTokenizer sink = new TokenTypeSinkTokenizer("D");
+    String test = "The quick red fox jumped over the lazy brown dogs";
+
+    TeeTokenFilter ttf = new TeeTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))), sink);
+    Token tok = new Token();
+    boolean seenDogs = false;
+    while ((tok = ttf.next(tok)) != null) {
+      if (tok.termText().equals("dogs")) {
+        seenDogs = true;
+        assertTrue(tok.type() + " is not equal to " + "D", tok.type().equals("D") == true);
+      } else {
+        assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
+      }
+    }
+    assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
+    assertTrue("sink Size: " + sink.getTokens().size() + " is not: " + 1, sink.getTokens().size() == 1);
+  }
+
+  private class WordTokenFilter extends TokenFilter {
+    private WordTokenFilter(TokenStream input) {
+      super(input);
+    }
+
+    public Token next(Token result) throws IOException {
+      result = input.next(result);
+      if (result != null && result.termText().equals("dogs")) {
+        result.setType("D");
+      }
+      return result;
+    }
+  }
+}