LUCENE-1077: refactored to have a common PayloadHelper classes. Also added TokenOffsetPayloadTokenFilter, which encodes the Token offset into the payloads

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@604870 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2007-12-17 13:55:46 +00:00
parent b7e167ac8d
commit 55d0c3a2f8
5 changed files with 189 additions and 38 deletions

View File

@ -17,9 +17,9 @@ package org.apache.lucene.analysis.payloads;
*/ */
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.Payload; import org.apache.lucene.index.Payload;
import java.io.IOException; import java.io.IOException;
@ -37,44 +37,10 @@ public class NumericPayloadTokenFilter extends TokenFilter {
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) { public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
super(input); super(input);
//Need to encode the payload //Need to encode the payload
thePayload = new Payload(encodePayload(payload)); thePayload = new Payload(PayloadHelper.encodeFloat(payload));
this.typeMatch = typeMatch; this.typeMatch = typeMatch;
} }
public static byte[] encodePayload(float payload) {
byte[] result = new byte[4];
int tmp = Float.floatToIntBits(payload);
result[0] = (byte)(tmp >> 24);
result[1] = (byte)(tmp >> 16);
result[2] = (byte)(tmp >> 8);
result[3] = (byte) tmp;
return result;
}
/**
* @see #decodePayload(byte[], int)
* @see #encodePayload(float)
*/
public static float decodePayload(byte [] bytes){
return decodePayload(bytes, 0);
}
/**
* Decode the payload that was encoded using {@link #encodePayload(float)}.
* NOTE: the length of the array must be at least offset + 4 long.
* @param bytes The bytes to decode
* @param offset The offset into the array.
* @return The float that was encoded
*
* @see #encodePayload(float)
*/
public static final float decodePayload(byte [] bytes, int offset){
int tmp = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
| ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
return Float.intBitsToFloat(tmp);
}
public Token next(Token result) throws IOException { public Token next(Token result) throws IOException {
result = input.next(result); result = input.next(result);
if (result != null && result.type().equals(typeMatch)){ if (result != null && result.type().equals(typeMatch)){

View File

@ -0,0 +1,70 @@
package org.apache.lucene.analysis.payloads;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*
**/
public class PayloadHelper {
public static byte[] encodeFloat(float payload) {
return encodeFloat(payload, new byte[4], 0);
}
public static byte[] encodeFloat(float payload, byte[] data, int offset){
return encodeInt(Float.floatToIntBits(payload), data, offset);
}
public static byte[] encodeInt(int payload, byte[] data, int offset){
data[offset] = (byte)(payload >> 24);
data[offset + 1] = (byte)(payload >> 16);
data[offset + 2] = (byte)(payload >> 8);
data[offset + 3] = (byte) payload;
return data;
}
/**
* @param bytes
* @see #decodeFloat(byte[], int)
* @see #encodeFloat(float)
* @return the decoded float
*/
public static float decodeFloat(byte [] bytes){
return decodeFloat(bytes, 0);
}
/**
* Decode the payload that was encoded using {@link #encodeFloat(float)}.
* NOTE: the length of the array must be at least offset + 4 long.
* @param bytes The bytes to decode
* @param offset The offset into the array.
* @return The float that was encoded
*
* @see # encodeFloat (float)
*/
public static final float decodeFloat(byte [] bytes, int offset){
return Float.intBitsToFloat(decodeInt(bytes, offset));
}
public static final int decodeInt(byte [] bytes, int offset){
return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
| ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
}
}

View File

@ -0,0 +1,52 @@
package org.apache.lucene.analysis.payloads;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Payload;
import java.io.IOException;
/**
* Adds the {@link org.apache.lucene.analysis.Token#setStartOffset(int)}
* and {@link org.apache.lucene.analysis.Token#setEndOffset(int)}
* First 4 bytes are the start
*
**/
public class TokenOffsetPayloadTokenFilter extends TokenFilter {
public TokenOffsetPayloadTokenFilter(TokenStream input) {
super(input);
}
public Token next(Token result) throws IOException {
result = input.next(result);
if (result != null){
byte[] data = new byte[8];
PayloadHelper.encodeInt(result.startOffset(), data, 0);
PayloadHelper.encodeInt(result.endOffset(), data, 4);
Payload payload = new Payload(data);
result.setPayload(payload);
}
return result;
}
}

View File

@ -17,9 +17,9 @@ package org.apache.lucene.analysis.payloads;
*/ */
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import java.io.IOException; import java.io.IOException;
@ -53,7 +53,7 @@ public class NumericPayloadTokenFilterTest extends TestCase {
byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length
assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length == tok.getPayload().length()); assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length == tok.getPayload().length());
assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset() == 0); assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset() == 0);
float pay = NumericPayloadTokenFilter.decodePayload(bytes); float pay = PayloadHelper.decodeFloat(bytes);
assertTrue(pay + " does not equal: " + 3, pay == 3); assertTrue(pay + " does not equal: " + 3, pay == 3);
} else { } else {
assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word")); assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));

View File

@ -0,0 +1,63 @@
package org.apache.lucene.analysis.payloads;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import junit.framework.TestCase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.index.Payload;
import java.io.IOException;
import java.io.StringReader;
public class TokenOffsetPayloadTokenFilterTest extends TestCase {
public TokenOffsetPayloadTokenFilterTest(String s) {
super(s);
}
protected void setUp() {
}
protected void tearDown() {
}
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
Token tok = new Token();
int count = 0;
while ((tok = nptf.next(tok)) != null){
assertTrue("tok is null and it shouldn't be", tok != null);
Payload pay = tok.getPayload();
assertTrue("pay is null and it shouldn't be", pay != null);
byte [] data = pay.getData();
int start = PayloadHelper.decodeInt(data, 0);
assertTrue(start + " does not equal: " + tok.startOffset(), start == tok.startOffset());
int end = PayloadHelper.decodeInt(data, 4);
assertTrue(end + " does not equal: " + tok.endOffset(), end == tok.endOffset());
count++;
}
assertTrue(count + " does not equal: " + 10, count == 10);
}
}