mirror of https://github.com/apache/lucene.git
LUCENE-1676: in-stream payload support
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@784297 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ad94a3d47a
commit
1511ec5e31
|
@ -61,6 +61,9 @@ New features
|
||||||
Model to tokenize Chinese words in a more intelligent way.
|
Model to tokenize Chinese words in a more intelligent way.
|
||||||
(Xiaoping Gao via Mike McCandless)
|
(Xiaoping Gao via Mike McCandless)
|
||||||
|
|
||||||
|
|
||||||
|
6. LUCENE-1676: Added DelimitedPayloadTokenFilter class for automatically adding payloads "in-stream" (Grant Ingersoll)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. LUCENE-1643: Re-use the collation key (RawCollationKey) for
|
1. LUCENE-1643: Re-use the collation key (RawCollationKey) for
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Payload;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public abstract class AbstractEncoder implements PayloadEncoder{
|
||||||
|
public Payload encode(char[] buffer) {
|
||||||
|
return encode(buffer, 0, buffer.length);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,109 @@
|
||||||
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Characters before the delimiter are the "token", those after are the payload.
|
||||||
|
* <p/>
|
||||||
|
* For example, if the delimiter is '|', then for the string "foo|bar", foo is the token
|
||||||
|
* and "bar" is a payload.
|
||||||
|
* <p/>
|
||||||
|
* Note, you can also include a {@link org.apache.lucene.analysis.payloads.PayloadEncoder} to convert the payload in an appropriate way (from characters to bytes).
|
||||||
|
* <p/>
|
||||||
|
* Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
|
||||||
|
*
|
||||||
|
* @see PayloadEncoder
|
||||||
|
*/
|
||||||
|
public class DelimitedPayloadTokenFilter extends TokenFilter {
|
||||||
|
public static final char DEFAULT_DELIMITER = '|';
|
||||||
|
protected char delimiter = DEFAULT_DELIMITER;
|
||||||
|
protected TermAttribute termAtt;
|
||||||
|
protected PayloadAttribute payAtt;
|
||||||
|
protected PayloadEncoder encoder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a token stream filtering the given input.
|
||||||
|
*/
|
||||||
|
protected DelimitedPayloadTokenFilter(TokenStream input) {
|
||||||
|
this(input, DEFAULT_DELIMITER, new IdentityEncoder());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) {
|
||||||
|
super(input);
|
||||||
|
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||||
|
payAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
|
||||||
|
this.delimiter = delimiter;
|
||||||
|
this.encoder = encoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean incrementToken() throws IOException {
|
||||||
|
boolean result = false;
|
||||||
|
if (input.incrementToken()) {
|
||||||
|
final char[] buffer = termAtt.termBuffer();
|
||||||
|
final int length = termAtt.termLength();
|
||||||
|
//look for the delimiter
|
||||||
|
boolean seen = false;
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
if (buffer[i] == delimiter) {
|
||||||
|
termAtt.setTermBuffer(buffer, 0, i);
|
||||||
|
payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
|
||||||
|
seen = true;
|
||||||
|
break;//at this point, we know the whole piece, so we can exit. If we don't see the delimiter, then the termAtt is the same
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (seen == false) {
|
||||||
|
//no delimiter
|
||||||
|
payAtt.setPayload(null);
|
||||||
|
}
|
||||||
|
result = true;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Token next(Token reusableToken) throws IOException {
|
||||||
|
Token result = input.next(reusableToken);
|
||||||
|
if (result != null) {
|
||||||
|
final char[] buffer = result.termBuffer();
|
||||||
|
final int length = result.termLength();
|
||||||
|
boolean seen = false;
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
if (buffer[i] == delimiter) {
|
||||||
|
result.setTermBuffer(buffer, 0, i);
|
||||||
|
result.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
|
||||||
|
seen = true;
|
||||||
|
break;//at this point, we know the whole piece, so we can exit. If we don't see the delimiter, then the termAtt is the same
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (seen == false) {
|
||||||
|
//no delimiter
|
||||||
|
payAtt.setPayload(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Payload;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode a character array Float as a {@link org.apache.lucene.index.Payload}.
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public class FloatEncoder extends AbstractEncoder implements PayloadEncoder {
|
||||||
|
|
||||||
|
public Payload encode(char[] buffer, int offset, int length) {
|
||||||
|
Payload result = new Payload();
|
||||||
|
float payload = Float.parseFloat(new String(buffer, offset, length));//TODO: improve this so that we don't have to new Strings
|
||||||
|
byte[] bytes = PayloadHelper.encodeFloat(payload);
|
||||||
|
result.setData(bytes);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Payload;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does nothing other than convert the char array to a byte array using the specified encoding.
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public class IdentityEncoder extends AbstractEncoder implements PayloadEncoder{
|
||||||
|
|
||||||
|
protected Charset charset = Charset.forName("UTF-8");
|
||||||
|
protected String charsetName = "UTF-8"; //argh, stupid 1.4
|
||||||
|
|
||||||
|
public IdentityEncoder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public IdentityEncoder(Charset charset) {
|
||||||
|
this.charset = charset;
|
||||||
|
charsetName = charset.name();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Payload encode(char[] buffer, int offset, int length) {
|
||||||
|
//what's the most efficient way to get a byte [] from a char[] array
|
||||||
|
//Do we have to go through String?
|
||||||
|
String tmp = new String(buffer, offset, length);
|
||||||
|
Payload result = null;//Can we avoid allocating by knowing where using the new API?
|
||||||
|
try {
|
||||||
|
result = new Payload(tmp.getBytes(charsetName));
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
//should never hit this, since we get the name from the Charset
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Payload;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode a character array Integer as a {@link org.apache.lucene.index.Payload}.
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public class IntegerEncoder extends AbstractEncoder implements PayloadEncoder {
|
||||||
|
|
||||||
|
public Payload encode(char[] buffer, int offset, int length) {
|
||||||
|
Payload result = new Payload();
|
||||||
|
int payload = ArrayUtil.parseInt(buffer, offset, length);//TODO: improve this so that we don't have to new Strings
|
||||||
|
byte[] bytes = PayloadHelper.encodeInt(payload);
|
||||||
|
result.setData(bytes);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Payload;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mainly for use with the DelimitedPayloadTokenFilter, converts char buffers to Payload
|
||||||
|
* <p/>
|
||||||
|
* NOTE: This interface is subject to change
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public interface PayloadEncoder {
|
||||||
|
|
||||||
|
Payload encode(char[] buffer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a char array to a {@link org.apache.lucene.index.Payload}
|
||||||
|
* @param buffer
|
||||||
|
* @param offset
|
||||||
|
* @param length
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
Payload encode(char [] buffer, int offset, int length);
|
||||||
|
}
|
|
@ -31,6 +31,10 @@ public class PayloadHelper {
|
||||||
return encodeInt(Float.floatToIntBits(payload), data, offset);
|
return encodeInt(Float.floatToIntBits(payload), data, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static byte[] encodeInt(int payload){
|
||||||
|
return encodeInt(payload, new byte[4], 0);
|
||||||
|
}
|
||||||
|
|
||||||
public static byte[] encodeInt(int payload, byte[] data, int offset){
|
public static byte[] encodeInt(int payload, byte[] data, int offset){
|
||||||
data[offset] = (byte)(payload >> 24);
|
data[offset] = (byte)(payload >> 24);
|
||||||
data[offset + 1] = (byte)(payload >> 16);
|
data[offset + 1] = (byte)(payload >> 16);
|
||||||
|
|
|
@ -0,0 +1,139 @@
|
||||||
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
|
import org.apache.lucene.index.Payload;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testPayloads() throws Exception {
|
||||||
|
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
|
||||||
|
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
|
||||||
|
TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
|
||||||
|
PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
|
||||||
|
assertTermEquals("The", filter, termAtt, payAtt, null);
|
||||||
|
assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("over", filter, termAtt, payAtt, null);
|
||||||
|
assertTermEquals("the", filter, termAtt, payAtt, null);
|
||||||
|
assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
|
||||||
|
assertFalse(filter.incrementToken());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNext() throws Exception {
|
||||||
|
|
||||||
|
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
|
||||||
|
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
|
||||||
|
assertTermEquals("The", filter, null);
|
||||||
|
assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("red", filter, "JJ".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("fox", filter, "NN".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("jumped", filter, "VB".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("over", filter, null);
|
||||||
|
assertTermEquals("the", filter, null);
|
||||||
|
assertTermEquals("lazy", filter, "JJ".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("brown", filter, "JJ".getBytes("UTF-8"));
|
||||||
|
assertTermEquals("dogs", filter, "NN".getBytes("UTF-8"));
|
||||||
|
assertTrue(filter.next(new Token()) == null);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testFloatEncoding() throws Exception {
|
||||||
|
String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
|
||||||
|
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new FloatEncoder());
|
||||||
|
TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
|
||||||
|
PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
|
||||||
|
assertTermEquals("The", filter, termAtt, payAtt, null);
|
||||||
|
assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
|
||||||
|
assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f));
|
||||||
|
assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f));
|
||||||
|
assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f));
|
||||||
|
assertTermEquals("over", filter, termAtt, payAtt, null);
|
||||||
|
assertTermEquals("the", filter, termAtt, payAtt, null);
|
||||||
|
assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f));
|
||||||
|
assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f));
|
||||||
|
assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f));
|
||||||
|
assertFalse(filter.incrementToken());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIntEncoding() throws Exception {
|
||||||
|
String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
|
||||||
|
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new IntegerEncoder());
|
||||||
|
TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
|
||||||
|
PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
|
||||||
|
assertTermEquals("The", filter, termAtt, payAtt, null);
|
||||||
|
assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
|
||||||
|
assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2));
|
||||||
|
assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3));
|
||||||
|
assertTermEquals("jumped", filter, termAtt, payAtt, null);
|
||||||
|
assertTermEquals("over", filter, termAtt, payAtt, null);
|
||||||
|
assertTermEquals("the", filter, termAtt, payAtt, null);
|
||||||
|
assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5));
|
||||||
|
assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99));
|
||||||
|
assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83));
|
||||||
|
assertFalse(filter.incrementToken());
|
||||||
|
}
|
||||||
|
|
||||||
|
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
|
||||||
|
Token tok = new Token();
|
||||||
|
assertTrue(stream.next(tok) != null);
|
||||||
|
assertEquals(expected, tok.term());
|
||||||
|
Payload payload = tok.getPayload();
|
||||||
|
if (payload != null) {
|
||||||
|
assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
|
||||||
|
for (int i = 0; i < expectPay.length; i++) {
|
||||||
|
assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i));
|
||||||
|
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assertTrue("expectPay is not null and it should be", expectPay == null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
|
||||||
|
assertTrue(stream.incrementToken());
|
||||||
|
assertEquals(expected, termAtt.term());
|
||||||
|
Payload payload = payAtt.getPayload();
|
||||||
|
if (payload != null) {
|
||||||
|
assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
|
||||||
|
for (int i = 0; i < expectPay.length; i++) {
|
||||||
|
assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i));
|
||||||
|
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assertTrue("expectPay is not null and it should be", expectPay == null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -7,9 +7,9 @@ package org.apache.lucene.util;
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
* <p/>
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
* <p/>
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
@ -18,6 +18,107 @@ package org.apache.lucene.util;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public final class ArrayUtil {
|
public final class ArrayUtil {
|
||||||
|
/*
|
||||||
|
Begin Apache Harmony code
|
||||||
|
|
||||||
|
Revision taken on Friday, June 12. https://svn.apache.org/repos/asf/harmony/enhanced/classlib/archive/java6/modules/luni/src/main/java/java/lang/Integer.java
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses the string argument as if it was an int value and returns the
|
||||||
|
* result. Throws NumberFormatException if the string does not represent an
|
||||||
|
* int quantity.
|
||||||
|
*
|
||||||
|
* @param chars a string representation of an int quantity.
|
||||||
|
* @return int the value represented by the argument
|
||||||
|
* @throws NumberFormatException if the argument could not be parsed as an int quantity.
|
||||||
|
*/
|
||||||
|
public static int parseInt(char[] chars) throws NumberFormatException {
|
||||||
|
return parseInt(chars, 0, chars.length, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a char array into an int.
|
||||||
|
* @param chars the character array
|
||||||
|
* @param offset The offset into the array
|
||||||
|
* @param len The length
|
||||||
|
* @return the int
|
||||||
|
* @throws NumberFormatException if it can't parse
|
||||||
|
*/
|
||||||
|
public static int parseInt(char[] chars, int offset, int len) throws NumberFormatException {
|
||||||
|
return parseInt(chars, offset, len, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses the string argument as if it was an int value and returns the
|
||||||
|
* result. Throws NumberFormatException if the string does not represent an
|
||||||
|
* int quantity. The second argument specifies the radix to use when parsing
|
||||||
|
* the value.
|
||||||
|
*
|
||||||
|
* @param chars a string representation of an int quantity.
|
||||||
|
* @param radix the base to use for conversion.
|
||||||
|
* @return int the value represented by the argument
|
||||||
|
* @throws NumberFormatException if the argument could not be parsed as an int quantity.
|
||||||
|
*/
|
||||||
|
public static int parseInt(char[] chars, int offset, int len, int radix)
|
||||||
|
throws NumberFormatException {
|
||||||
|
if (chars == null || radix < Character.MIN_RADIX
|
||||||
|
|| radix > Character.MAX_RADIX) {
|
||||||
|
throw new NumberFormatException();
|
||||||
|
}
|
||||||
|
int i = 0;
|
||||||
|
if (len == 0) {
|
||||||
|
throw new NumberFormatException("chars length is 0");
|
||||||
|
}
|
||||||
|
boolean negative = chars[offset + i] == '-';
|
||||||
|
if (negative && ++i == len) {
|
||||||
|
throw new NumberFormatException("can't convert to an int");
|
||||||
|
}
|
||||||
|
if (negative == true){
|
||||||
|
offset++;
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
return parse(chars, offset, len, radix, negative);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static int parse(char[] chars, int offset, int len, int radix,
|
||||||
|
boolean negative) throws NumberFormatException {
|
||||||
|
int max = Integer.MIN_VALUE / radix;
|
||||||
|
int result = 0;
|
||||||
|
for (int i = 0; i < len; i++){
|
||||||
|
int digit = Character.digit(chars[i + offset], radix);
|
||||||
|
if (digit == -1) {
|
||||||
|
throw new NumberFormatException("Unable to parse");
|
||||||
|
}
|
||||||
|
if (max > result) {
|
||||||
|
throw new NumberFormatException("Unable to parse");
|
||||||
|
}
|
||||||
|
int next = result * radix - digit;
|
||||||
|
if (next > result) {
|
||||||
|
throw new NumberFormatException("Unable to parse");
|
||||||
|
}
|
||||||
|
result = next;
|
||||||
|
}
|
||||||
|
/*while (offset < len) {
|
||||||
|
|
||||||
|
}*/
|
||||||
|
if (!negative) {
|
||||||
|
result = -result;
|
||||||
|
if (result < 0) {
|
||||||
|
throw new NumberFormatException("Unable to parse");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
END APACHE HARMONY CODE
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
public static int getNextSize(int targetSize) {
|
public static int getNextSize(int targetSize) {
|
||||||
/* This over-allocates proportional to the list size, making room
|
/* This over-allocates proportional to the list size, making room
|
||||||
|
@ -35,7 +136,7 @@ public final class ArrayUtil {
|
||||||
// Only reallocate if we are "substantially" smaller.
|
// Only reallocate if we are "substantially" smaller.
|
||||||
// This saves us from "running hot" (constantly making a
|
// This saves us from "running hot" (constantly making a
|
||||||
// bit bigger then a bit smaller, over and over):
|
// bit bigger then a bit smaller, over and over):
|
||||||
if (newSize < currentSize/2)
|
if (newSize < currentSize / 2)
|
||||||
return newSize;
|
return newSize;
|
||||||
else
|
else
|
||||||
return currentSize;
|
return currentSize;
|
||||||
|
@ -51,7 +152,7 @@ public final class ArrayUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int[] grow(int[] array) {
|
public static int[] grow(int[] array) {
|
||||||
return grow(array, 1+array.length);
|
return grow(array, 1 + array.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int[] shrink(int[] array, int targetSize) {
|
public static int[] shrink(int[] array, int targetSize) {
|
||||||
|
@ -74,7 +175,7 @@ public final class ArrayUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static long[] grow(long[] array) {
|
public static long[] grow(long[] array) {
|
||||||
return grow(array, 1+array.length);
|
return grow(array, 1 + array.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static long[] shrink(long[] array, int targetSize) {
|
public static long[] shrink(long[] array, int targetSize) {
|
||||||
|
@ -97,7 +198,7 @@ public final class ArrayUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static byte[] grow(byte[] array) {
|
public static byte[] grow(byte[] array) {
|
||||||
return grow(array, 1+array.length);
|
return grow(array, 1 + array.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static byte[] shrink(byte[] array, int targetSize) {
|
public static byte[] shrink(byte[] array, int targetSize) {
|
||||||
|
@ -110,21 +211,25 @@ public final class ArrayUtil {
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns hash of chars in range start (inclusive) to
|
/**
|
||||||
* end (inclusive) */
|
* Returns hash of chars in range start (inclusive) to
|
||||||
|
* end (inclusive)
|
||||||
|
*/
|
||||||
public static int hashCode(char[] array, int start, int end) {
|
public static int hashCode(char[] array, int start, int end) {
|
||||||
int code = 0;
|
int code = 0;
|
||||||
for(int i=end-1;i>=start;i--)
|
for (int i = end - 1; i >= start; i--)
|
||||||
code = code*31 + array[i];
|
code = code * 31 + array[i];
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns hash of chars in range start (inclusive) to
|
/**
|
||||||
* end (inclusive) */
|
* Returns hash of chars in range start (inclusive) to
|
||||||
|
* end (inclusive)
|
||||||
|
*/
|
||||||
public static int hashCode(byte[] array, int start, int end) {
|
public static int hashCode(byte[] array, int start, int end) {
|
||||||
int code = 0;
|
int code = 0;
|
||||||
for(int i=end-1;i>=start;i--)
|
for (int i = end - 1; i >= start; i--)
|
||||||
code = code*31 + array[i];
|
code = code * 31 + array[i];
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public class ArrayUtilTest extends TestCase {
|
||||||
|
|
||||||
|
public void testParseInt() throws Exception {
|
||||||
|
int test;
|
||||||
|
try {
|
||||||
|
test = ArrayUtil.parseInt("".toCharArray());
|
||||||
|
assertTrue(false);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
//expected
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
test = ArrayUtil.parseInt("foo".toCharArray());
|
||||||
|
assertTrue(false);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
//expected
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
test = ArrayUtil.parseInt(String.valueOf(Long.MAX_VALUE).toCharArray());
|
||||||
|
assertTrue(false);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
//expected
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
test = ArrayUtil.parseInt("0.34".toCharArray());
|
||||||
|
assertTrue(false);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
//expected
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
test = ArrayUtil.parseInt("1".toCharArray());
|
||||||
|
assertTrue(test + " does not equal: " + 1, test == 1);
|
||||||
|
test = ArrayUtil.parseInt("-10000".toCharArray());
|
||||||
|
assertTrue(test + " does not equal: " + -10000, test == -10000);
|
||||||
|
test = ArrayUtil.parseInt("1923".toCharArray());
|
||||||
|
assertTrue(test + " does not equal: " + 1923, test == 1923);
|
||||||
|
test = ArrayUtil.parseInt("-1".toCharArray());
|
||||||
|
assertTrue(test + " does not equal: " + -1, test == -1);
|
||||||
|
test = ArrayUtil.parseInt("foo 1923 bar".toCharArray(), 4, 4);
|
||||||
|
assertTrue(test + " does not equal: " + 1923, test == 1923);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
assertTrue(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue