mirror of https://github.com/apache/lucene.git
LUCENE-1077: refactored to have a common PayloadHelper classes. Also added TokenOffsetPayloadTokenFilter, which encodes the Token offset into the payloads
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@604870 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b7e167ac8d
commit
55d0c3a2f8
|
@ -17,9 +17,9 @@ package org.apache.lucene.analysis.payloads;
|
|||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.Payload;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -37,44 +37,10 @@ public class NumericPayloadTokenFilter extends TokenFilter {
|
|||
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
|
||||
super(input);
|
||||
//Need to encode the payload
|
||||
thePayload = new Payload(encodePayload(payload));
|
||||
thePayload = new Payload(PayloadHelper.encodeFloat(payload));
|
||||
this.typeMatch = typeMatch;
|
||||
}
|
||||
|
||||
public static byte[] encodePayload(float payload) {
|
||||
byte[] result = new byte[4];
|
||||
int tmp = Float.floatToIntBits(payload);
|
||||
result[0] = (byte)(tmp >> 24);
|
||||
result[1] = (byte)(tmp >> 16);
|
||||
result[2] = (byte)(tmp >> 8);
|
||||
result[3] = (byte) tmp;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #decodePayload(byte[], int)
|
||||
* @see #encodePayload(float)
|
||||
*/
|
||||
public static float decodePayload(byte [] bytes){
|
||||
return decodePayload(bytes, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the payload that was encoded using {@link #encodePayload(float)}.
|
||||
* NOTE: the length of the array must be at least offset + 4 long.
|
||||
* @param bytes The bytes to decode
|
||||
* @param offset The offset into the array.
|
||||
* @return The float that was encoded
|
||||
*
|
||||
* @see #encodePayload(float)
|
||||
*/
|
||||
public static final float decodePayload(byte [] bytes, int offset){
|
||||
int tmp = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
|
||||
| ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
|
||||
return Float.intBitsToFloat(tmp);
|
||||
}
|
||||
|
||||
public Token next(Token result) throws IOException {
|
||||
result = input.next(result);
|
||||
if (result != null && result.type().equals(typeMatch)){
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
package org.apache.lucene.analysis.payloads;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public class PayloadHelper {
|
||||
|
||||
public static byte[] encodeFloat(float payload) {
|
||||
return encodeFloat(payload, new byte[4], 0);
|
||||
}
|
||||
|
||||
public static byte[] encodeFloat(float payload, byte[] data, int offset){
|
||||
return encodeInt(Float.floatToIntBits(payload), data, offset);
|
||||
}
|
||||
|
||||
public static byte[] encodeInt(int payload, byte[] data, int offset){
|
||||
data[offset] = (byte)(payload >> 24);
|
||||
data[offset + 1] = (byte)(payload >> 16);
|
||||
data[offset + 2] = (byte)(payload >> 8);
|
||||
data[offset + 3] = (byte) payload;
|
||||
return data;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bytes
|
||||
* @see #decodeFloat(byte[], int)
|
||||
* @see #encodeFloat(float)
|
||||
* @return the decoded float
|
||||
*/
|
||||
public static float decodeFloat(byte [] bytes){
|
||||
return decodeFloat(bytes, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the payload that was encoded using {@link #encodeFloat(float)}.
|
||||
* NOTE: the length of the array must be at least offset + 4 long.
|
||||
* @param bytes The bytes to decode
|
||||
* @param offset The offset into the array.
|
||||
* @return The float that was encoded
|
||||
*
|
||||
* @see # encodeFloat (float)
|
||||
*/
|
||||
public static final float decodeFloat(byte [] bytes, int offset){
|
||||
|
||||
return Float.intBitsToFloat(decodeInt(bytes, offset));
|
||||
}
|
||||
|
||||
public static final int decodeInt(byte [] bytes, int offset){
|
||||
return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
|
||||
| ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package org.apache.lucene.analysis.payloads;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.Payload;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Adds the {@link org.apache.lucene.analysis.Token#setStartOffset(int)}
|
||||
* and {@link org.apache.lucene.analysis.Token#setEndOffset(int)}
|
||||
* First 4 bytes are the start
|
||||
*
|
||||
**/
|
||||
public class TokenOffsetPayloadTokenFilter extends TokenFilter {
|
||||
|
||||
|
||||
public TokenOffsetPayloadTokenFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
public Token next(Token result) throws IOException {
|
||||
result = input.next(result);
|
||||
if (result != null){
|
||||
byte[] data = new byte[8];
|
||||
PayloadHelper.encodeInt(result.startOffset(), data, 0);
|
||||
PayloadHelper.encodeInt(result.endOffset(), data, 4);
|
||||
Payload payload = new Payload(data);
|
||||
result.setPayload(payload);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -17,9 +17,9 @@ package org.apache.lucene.analysis.payloads;
|
|||
*/
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -53,7 +53,7 @@ public class NumericPayloadTokenFilterTest extends TestCase {
|
|||
byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length
|
||||
assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length == tok.getPayload().length());
|
||||
assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset() == 0);
|
||||
float pay = NumericPayloadTokenFilter.decodePayload(bytes);
|
||||
float pay = PayloadHelper.decodeFloat(bytes);
|
||||
assertTrue(pay + " does not equal: " + 3, pay == 3);
|
||||
} else {
|
||||
assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
package org.apache.lucene.analysis.payloads;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.index.Payload;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
public class TokenOffsetPayloadTokenFilterTest extends TestCase {
|
||||
|
||||
|
||||
public TokenOffsetPayloadTokenFilterTest(String s) {
|
||||
super(s);
|
||||
}
|
||||
|
||||
protected void setUp() {
|
||||
}
|
||||
|
||||
protected void tearDown() {
|
||||
|
||||
}
|
||||
|
||||
public void test() throws IOException {
|
||||
String test = "The quick red fox jumped over the lazy brown dogs";
|
||||
|
||||
TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
|
||||
Token tok = new Token();
|
||||
int count = 0;
|
||||
while ((tok = nptf.next(tok)) != null){
|
||||
assertTrue("tok is null and it shouldn't be", tok != null);
|
||||
Payload pay = tok.getPayload();
|
||||
assertTrue("pay is null and it shouldn't be", pay != null);
|
||||
byte [] data = pay.getData();
|
||||
int start = PayloadHelper.decodeInt(data, 0);
|
||||
assertTrue(start + " does not equal: " + tok.startOffset(), start == tok.startOffset());
|
||||
int end = PayloadHelper.decodeInt(data, 4);
|
||||
assertTrue(end + " does not equal: " + tok.endOffset(), end == tok.endOffset());
|
||||
count++;
|
||||
}
|
||||
assertTrue(count + " does not equal: " + 10, count == 10);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
Loading…
Reference in New Issue