mirror of https://github.com/apache/lucene.git
LUCENE-1077: refactored to have a common PayloadHelper classes. Also added TokenOffsetPayloadTokenFilter, which encodes the Token offset into the payloads
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@604870 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b7e167ac8d
commit
55d0c3a2f8
|
@ -17,9 +17,9 @@ package org.apache.lucene.analysis.payloads;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.index.Payload;
|
import org.apache.lucene.index.Payload;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -37,44 +37,10 @@ public class NumericPayloadTokenFilter extends TokenFilter {
|
||||||
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
|
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
|
||||||
super(input);
|
super(input);
|
||||||
//Need to encode the payload
|
//Need to encode the payload
|
||||||
thePayload = new Payload(encodePayload(payload));
|
thePayload = new Payload(PayloadHelper.encodeFloat(payload));
|
||||||
this.typeMatch = typeMatch;
|
this.typeMatch = typeMatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static byte[] encodePayload(float payload) {
|
|
||||||
byte[] result = new byte[4];
|
|
||||||
int tmp = Float.floatToIntBits(payload);
|
|
||||||
result[0] = (byte)(tmp >> 24);
|
|
||||||
result[1] = (byte)(tmp >> 16);
|
|
||||||
result[2] = (byte)(tmp >> 8);
|
|
||||||
result[3] = (byte) tmp;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @see #decodePayload(byte[], int)
|
|
||||||
* @see #encodePayload(float)
|
|
||||||
*/
|
|
||||||
public static float decodePayload(byte [] bytes){
|
|
||||||
return decodePayload(bytes, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decode the payload that was encoded using {@link #encodePayload(float)}.
|
|
||||||
* NOTE: the length of the array must be at least offset + 4 long.
|
|
||||||
* @param bytes The bytes to decode
|
|
||||||
* @param offset The offset into the array.
|
|
||||||
* @return The float that was encoded
|
|
||||||
*
|
|
||||||
* @see #encodePayload(float)
|
|
||||||
*/
|
|
||||||
public static final float decodePayload(byte [] bytes, int offset){
|
|
||||||
int tmp = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
|
|
||||||
| ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
|
|
||||||
return Float.intBitsToFloat(tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Token next(Token result) throws IOException {
|
public Token next(Token result) throws IOException {
|
||||||
result = input.next(result);
|
result = input.next(result);
|
||||||
if (result != null && result.type().equals(typeMatch)){
|
if (result != null && result.type().equals(typeMatch)){
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public class PayloadHelper {
|
||||||
|
|
||||||
|
public static byte[] encodeFloat(float payload) {
|
||||||
|
return encodeFloat(payload, new byte[4], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[] encodeFloat(float payload, byte[] data, int offset){
|
||||||
|
return encodeInt(Float.floatToIntBits(payload), data, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[] encodeInt(int payload, byte[] data, int offset){
|
||||||
|
data[offset] = (byte)(payload >> 24);
|
||||||
|
data[offset + 1] = (byte)(payload >> 16);
|
||||||
|
data[offset + 2] = (byte)(payload >> 8);
|
||||||
|
data[offset + 3] = (byte) payload;
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param bytes
|
||||||
|
* @see #decodeFloat(byte[], int)
|
||||||
|
* @see #encodeFloat(float)
|
||||||
|
* @return the decoded float
|
||||||
|
*/
|
||||||
|
public static float decodeFloat(byte [] bytes){
|
||||||
|
return decodeFloat(bytes, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode the payload that was encoded using {@link #encodeFloat(float)}.
|
||||||
|
* NOTE: the length of the array must be at least offset + 4 long.
|
||||||
|
* @param bytes The bytes to decode
|
||||||
|
* @param offset The offset into the array.
|
||||||
|
* @return The float that was encoded
|
||||||
|
*
|
||||||
|
* @see # encodeFloat (float)
|
||||||
|
*/
|
||||||
|
public static final float decodeFloat(byte [] bytes, int offset){
|
||||||
|
|
||||||
|
return Float.intBitsToFloat(decodeInt(bytes, offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final int decodeInt(byte [] bytes, int offset){
|
||||||
|
return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
|
||||||
|
| ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,52 @@
|
||||||
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.index.Payload;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds the {@link org.apache.lucene.analysis.Token#setStartOffset(int)}
|
||||||
|
* and {@link org.apache.lucene.analysis.Token#setEndOffset(int)}
|
||||||
|
* First 4 bytes are the start
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public class TokenOffsetPayloadTokenFilter extends TokenFilter {
|
||||||
|
|
||||||
|
|
||||||
|
public TokenOffsetPayloadTokenFilter(TokenStream input) {
|
||||||
|
super(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Token next(Token result) throws IOException {
|
||||||
|
result = input.next(result);
|
||||||
|
if (result != null){
|
||||||
|
byte[] data = new byte[8];
|
||||||
|
PayloadHelper.encodeInt(result.startOffset(), data, 0);
|
||||||
|
PayloadHelper.encodeInt(result.endOffset(), data, 4);
|
||||||
|
Payload payload = new Payload(data);
|
||||||
|
result.setPayload(payload);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,9 +17,9 @@ package org.apache.lucene.analysis.payloads;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -53,7 +53,7 @@ public class NumericPayloadTokenFilterTest extends TestCase {
|
||||||
byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length
|
byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length
|
||||||
assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length == tok.getPayload().length());
|
assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length == tok.getPayload().length());
|
||||||
assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset() == 0);
|
assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset() == 0);
|
||||||
float pay = NumericPayloadTokenFilter.decodePayload(bytes);
|
float pay = PayloadHelper.decodeFloat(bytes);
|
||||||
assertTrue(pay + " does not equal: " + 3, pay == 3);
|
assertTrue(pay + " does not equal: " + 3, pay == 3);
|
||||||
} else {
|
} else {
|
||||||
assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
|
assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
|
import org.apache.lucene.index.Payload;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
public class TokenOffsetPayloadTokenFilterTest extends TestCase {
|
||||||
|
|
||||||
|
|
||||||
|
public TokenOffsetPayloadTokenFilterTest(String s) {
|
||||||
|
super(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setUp() {
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void tearDown() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test() throws IOException {
|
||||||
|
String test = "The quick red fox jumped over the lazy brown dogs";
|
||||||
|
|
||||||
|
TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
|
||||||
|
Token tok = new Token();
|
||||||
|
int count = 0;
|
||||||
|
while ((tok = nptf.next(tok)) != null){
|
||||||
|
assertTrue("tok is null and it shouldn't be", tok != null);
|
||||||
|
Payload pay = tok.getPayload();
|
||||||
|
assertTrue("pay is null and it shouldn't be", pay != null);
|
||||||
|
byte [] data = pay.getData();
|
||||||
|
int start = PayloadHelper.decodeInt(data, 0);
|
||||||
|
assertTrue(start + " does not equal: " + tok.startOffset(), start == tok.startOffset());
|
||||||
|
int end = PayloadHelper.decodeInt(data, 4);
|
||||||
|
assertTrue(end + " does not equal: " + tok.endOffset(), end == tok.endOffset());
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
assertTrue(count + " does not equal: " + 10, count == 10);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue