LUCENE-1077: refactored to have a common PayloadHelper classes. Also added TokenOffsetPayloadTokenFilter, which encodes the Token offset into the payloads

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@604870 13f79535-47bb-0310-9956-ffa450edef68
2007-12-17 13:55:46 +00:00 · 2007-12-17 13:55:46 +00:00 · 55d0c3a2f8
parent b7e167ac8d
commit 55d0c3a2f8
5 changed files with 189 additions and 38 deletions
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
@ -17,9 +17,9 @@ package org.apache.lucene.analysis.payloads;
 */
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.index.Payload;
 import java.io.IOException;
@ -37,44 +37,10 @@ public class NumericPayloadTokenFilter extends TokenFilter {
  public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
    super(input);
    //Need to encode the payload
-    thePayload = new Payload(encodePayload(payload));
+    thePayload = new Payload(PayloadHelper.encodeFloat(payload));
    this.typeMatch = typeMatch;
  }
  public static byte[] encodePayload(float payload) {
    byte[] result = new byte[4];
    int tmp = Float.floatToIntBits(payload);
    result[0] = (byte)(tmp >> 24);
    result[1] = (byte)(tmp >> 16);
    result[2] = (byte)(tmp >>  8);
    result[3] = (byte) tmp;
    return result;
  }
  /**
   * @see #decodePayload(byte[], int)
   * @see #encodePayload(float)
   */
  public static float decodePayload(byte [] bytes){
    return decodePayload(bytes, 0);
  }
  /**
   * Decode the payload that was encoded using {@link #encodePayload(float)}.
   * NOTE: the length of the array must be at least offset + 4 long.
   * @param bytes The bytes to decode
   * @param offset The offset into the array.
   * @return The float that was encoded
   *
   * @see #encodePayload(float) 
   */
  public static final float decodePayload(byte [] bytes, int offset){
    int tmp = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
         | ((bytes[offset + 2] & 0xFF) <<  8) |  (bytes[offset + 3] & 0xFF);
    return Float.intBitsToFloat(tmp);
  }
  public Token next(Token result) throws IOException {
    result = input.next(result);
    if (result != null && result.type().equals(typeMatch)){
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
@ -0,0 +1,70 @@
 package org.apache.lucene.analysis.payloads;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *
 *
 **/
 public class PayloadHelper {
  public static byte[] encodeFloat(float payload) {
    return encodeFloat(payload, new byte[4], 0);
  }
  public static byte[] encodeFloat(float payload, byte[] data, int offset){
    return encodeInt(Float.floatToIntBits(payload), data, offset);
  }
  public static byte[] encodeInt(int payload, byte[] data, int offset){
    data[offset] = (byte)(payload >> 24);
    data[offset + 1] = (byte)(payload >> 16);
    data[offset + 2] = (byte)(payload >>  8);
    data[offset + 3] = (byte) payload;
    return data;
  }
  /**
   * @param bytes
   * @see #decodeFloat(byte[], int)
   * @see #encodeFloat(float)
   * @return the decoded float
   */
  public static float decodeFloat(byte [] bytes){
    return decodeFloat(bytes, 0);
  }
  /**
   * Decode the payload that was encoded using {@link #encodeFloat(float)}.
   * NOTE: the length of the array must be at least offset + 4 long.
   * @param bytes The bytes to decode
   * @param offset The offset into the array.
   * @return The float that was encoded
   *
   * @see # encodeFloat (float)
   */
  public static final float decodeFloat(byte [] bytes, int offset){
    return Float.intBitsToFloat(decodeInt(bytes, offset));
  }
  public static final int decodeInt(byte [] bytes, int offset){
    return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
         | ((bytes[offset + 2] & 0xFF) <<  8) |  (bytes[offset + 3] & 0xFF);
  }
 }
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
@ -0,0 +1,52 @@
 package org.apache.lucene.analysis.payloads;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.index.Payload;
 import java.io.IOException;
 /**
 * Adds the {@link org.apache.lucene.analysis.Token#setStartOffset(int)}
 * and {@link org.apache.lucene.analysis.Token#setEndOffset(int)}
 * First 4 bytes are the start
 *
 **/
 public class TokenOffsetPayloadTokenFilter extends TokenFilter {
  public TokenOffsetPayloadTokenFilter(TokenStream input) {
    super(input);
  }
  public Token next(Token result) throws IOException {
    result = input.next(result);
    if (result != null){
      byte[] data = new byte[8];
      PayloadHelper.encodeInt(result.startOffset(), data, 0);
      PayloadHelper.encodeInt(result.endOffset(), data, 4);
      Payload payload = new Payload(data);
      result.setPayload(payload);
    }
    return result;
  }
 }
--- a/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
@ -17,9 +17,9 @@ package org.apache.lucene.analysis.payloads;
 */
 import junit.framework.TestCase;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import java.io.IOException;
@ -53,7 +53,7 @@ public class NumericPayloadTokenFilterTest extends TestCase {
        byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length
        assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length == tok.getPayload().length());
        assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset() == 0);
-        float pay = NumericPayloadTokenFilter.decodePayload(bytes);
+        float pay = PayloadHelper.decodeFloat(bytes);
        assertTrue(pay + " does not equal: " + 3, pay == 3);
      } else {
        assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
--- a/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
@ -0,0 +1,63 @@
 package org.apache.lucene.analysis.payloads;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import junit.framework.TestCase;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.index.Payload;
 import java.io.IOException;
 import java.io.StringReader;
 public class TokenOffsetPayloadTokenFilterTest extends TestCase {
  public TokenOffsetPayloadTokenFilterTest(String s) {
    super(s);
  }
  protected void setUp() {
  }
  protected void tearDown() {
  }
  public void test() throws IOException {
    String test = "The quick red fox jumped over the lazy brown dogs";
    TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
    Token tok = new Token();
    int count = 0;
    while ((tok = nptf.next(tok)) != null){
      assertTrue("tok is null and it shouldn't be", tok != null);
      Payload pay = tok.getPayload();
      assertTrue("pay is null and it shouldn't be", pay != null);
      byte [] data = pay.getData();
      int start = PayloadHelper.decodeInt(data, 0);
      assertTrue(start + " does not equal: " + tok.startOffset(), start == tok.startOffset());
      int end = PayloadHelper.decodeInt(data, 4);
      assertTrue(end + " does not equal: " + tok.endOffset(), end == tok.endOffset());
      count++;
    }
    assertTrue(count + " does not equal: " + 10, count == 10);
  }
 }