diff --git a/contrib/CHANGES.txt b/contrib/CHANGES.txt index 178f587f025..200676bc03f 100644 --- a/contrib/CHANGES.txt +++ b/contrib/CHANGES.txt @@ -60,6 +60,9 @@ New features sentences properly. SmartChineseAnalyzer uses a Hidden Markov Model to tokenize Chinese words in a more intelligent way. (Xiaoping Gao via Mike McCandless) + + +6. LUCENE-1676: Added DelimitedPayloadTokenFilter class for automatically adding payloads "in-stream" (Grant Ingersoll) Optimizations diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java new file mode 100644 index 00000000000..6b0533bae01 --- /dev/null +++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java @@ -0,0 +1,14 @@ +package org.apache.lucene.analysis.payloads; + +import org.apache.lucene.index.Payload; + + +/** + * + * + **/ +public abstract class AbstractEncoder implements PayloadEncoder{ + public Payload encode(char[] buffer) { + return encode(buffer, 0, buffer.length); + } +} diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java new file mode 100644 index 00000000000..f4931153506 --- /dev/null +++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java @@ -0,0 +1,109 @@ +package org.apache.lucene.analysis.payloads; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; + +import java.io.IOException; + + +/** + * Characters before the delimiter are the "token", those after are the payload. + *

+ * For example, if the delimiter is '|', then for the string "foo|bar", foo is the token + * and "bar" is a payload. + *

+ * Note, you can also include a {@link org.apache.lucene.analysis.payloads.PayloadEncoder} to convert the payload in an appropriate way (from characters to bytes). + *

+ * Note make sure your Tokenizer doesn't split on the delimiter, or this won't work + * + * @see PayloadEncoder + */ +public class DelimitedPayloadTokenFilter extends TokenFilter { + public static final char DEFAULT_DELIMITER = '|'; + protected char delimiter = DEFAULT_DELIMITER; + protected TermAttribute termAtt; + protected PayloadAttribute payAtt; + protected PayloadEncoder encoder; + + /** + * Construct a token stream filtering the given input. + */ + protected DelimitedPayloadTokenFilter(TokenStream input) { + this(input, DEFAULT_DELIMITER, new IdentityEncoder()); + } + + + public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) { + super(input); + termAtt = (TermAttribute) addAttribute(TermAttribute.class); + payAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class); + this.delimiter = delimiter; + this.encoder = encoder; + } + + public boolean incrementToken() throws IOException { + boolean result = false; + if (input.incrementToken()) { + final char[] buffer = termAtt.termBuffer(); + final int length = termAtt.termLength(); + //look for the delimiter + boolean seen = false; + for (int i = 0; i < length; i++) { + if (buffer[i] == delimiter) { + termAtt.setTermBuffer(buffer, 0, i); + payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1)))); + seen = true; + break;//at this point, we know the whole piece, so we can exit. If we don't see the delimiter, then the termAtt is the same + } + } + if (seen == false) { + //no delimiter + payAtt.setPayload(null); + } + result = true; + } + return result; + } + + + public Token next(Token reusableToken) throws IOException { + Token result = input.next(reusableToken); + if (result != null) { + final char[] buffer = result.termBuffer(); + final int length = result.termLength(); + boolean seen = false; + for (int i = 0; i < length; i++) { + if (buffer[i] == delimiter) { + result.setTermBuffer(buffer, 0, i); + result.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1)))); + seen = true; + break;//at this point, we know the whole piece, so we can exit. If we don't see the delimiter, then the termAtt is the same + } + } + if (seen == false) { + //no delimiter + payAtt.setPayload(null); + } + } + return result; + } +} diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java new file mode 100644 index 00000000000..5c6305879ee --- /dev/null +++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java @@ -0,0 +1,35 @@ +package org.apache.lucene.analysis.payloads; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Payload; + + +/** + * Encode a character array Float as a {@link org.apache.lucene.index.Payload}. + * + **/ +public class FloatEncoder extends AbstractEncoder implements PayloadEncoder { + + public Payload encode(char[] buffer, int offset, int length) { + Payload result = new Payload(); + float payload = Float.parseFloat(new String(buffer, offset, length));//TODO: improve this so that we don't have to new Strings + byte[] bytes = PayloadHelper.encodeFloat(payload); + result.setData(bytes); + return result; + } +} diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java new file mode 100644 index 00000000000..02bdda2b093 --- /dev/null +++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java @@ -0,0 +1,57 @@ +package org.apache.lucene.analysis.payloads; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Payload; + +import java.nio.charset.Charset; +import java.util.Arrays; +import java.io.UnsupportedEncodingException; + + +/** + * Does nothing other than convert the char array to a byte array using the specified encoding. + * + **/ +public class IdentityEncoder extends AbstractEncoder implements PayloadEncoder{ + + protected Charset charset = Charset.forName("UTF-8"); + protected String charsetName = "UTF-8"; //argh, stupid 1.4 + + public IdentityEncoder() { + } + + public IdentityEncoder(Charset charset) { + this.charset = charset; + charsetName = charset.name(); + } + + + public Payload encode(char[] buffer, int offset, int length) { + //what's the most efficient way to get a byte [] from a char[] array + //Do we have to go through String? + String tmp = new String(buffer, offset, length); + Payload result = null;//Can we avoid allocating by knowing where using the new API? + try { + result = new Payload(tmp.getBytes(charsetName)); + } catch (UnsupportedEncodingException e) { + //should never hit this, since we get the name from the Charset + } + + return result; + } +} diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java new file mode 100644 index 00000000000..29841792204 --- /dev/null +++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java @@ -0,0 +1,36 @@ +package org.apache.lucene.analysis.payloads; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Payload; +import org.apache.lucene.util.ArrayUtil; + + +/** + * Encode a character array Integer as a {@link org.apache.lucene.index.Payload}. + * + **/ +public class IntegerEncoder extends AbstractEncoder implements PayloadEncoder { + + public Payload encode(char[] buffer, int offset, int length) { + Payload result = new Payload(); + int payload = ArrayUtil.parseInt(buffer, offset, length);//TODO: improve this so that we don't have to new Strings + byte[] bytes = PayloadHelper.encodeInt(payload); + result.setData(bytes); + return result; + } +} \ No newline at end of file diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java new file mode 100644 index 00000000000..d88cc1c2e08 --- /dev/null +++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java @@ -0,0 +1,40 @@ +package org.apache.lucene.analysis.payloads; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Payload; + + +/** + * Mainly for use with the DelimitedPayloadTokenFilter, converts char buffers to Payload + *

+ * NOTE: This interface is subject to change + * + **/ +public interface PayloadEncoder { + + Payload encode(char[] buffer); + + /** + * Convert a char array to a {@link org.apache.lucene.index.Payload} + * @param buffer + * @param offset + * @param length + * @return + */ + Payload encode(char [] buffer, int offset, int length); +} diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java index b15020267c0..31684d567c6 100644 --- a/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java +++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java @@ -31,6 +31,10 @@ public class PayloadHelper { return encodeInt(Float.floatToIntBits(payload), data, offset); } + public static byte[] encodeInt(int payload){ + return encodeInt(payload, new byte[4], 0); + } + public static byte[] encodeInt(int payload, byte[] data, int offset){ data[offset] = (byte)(payload >> 24); data[offset + 1] = (byte)(payload >> 16); diff --git a/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java b/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java new file mode 100644 index 00000000000..4ef09ab1d61 --- /dev/null +++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java @@ -0,0 +1,139 @@ +package org.apache.lucene.analysis.payloads; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceTokenizer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.index.Payload; +import org.apache.lucene.util.LuceneTestCase; + +import java.io.StringReader; + + +/** + * + * + **/ +public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { + + public void testPayloads() throws Exception { + String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; + DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test))); + TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class); + PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class); + assertTermEquals("The", filter, termAtt, payAtt, null); + assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); + assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); + assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); + assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8")); + assertTermEquals("over", filter, termAtt, payAtt, null); + assertTermEquals("the", filter, termAtt, payAtt, null); + assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); + assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); + assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); + assertFalse(filter.incrementToken()); + } + + public void testNext() throws Exception { + + String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; + DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test))); + assertTermEquals("The", filter, null); + assertTermEquals("quick", filter, "JJ".getBytes("UTF-8")); + assertTermEquals("red", filter, "JJ".getBytes("UTF-8")); + assertTermEquals("fox", filter, "NN".getBytes("UTF-8")); + assertTermEquals("jumped", filter, "VB".getBytes("UTF-8")); + assertTermEquals("over", filter, null); + assertTermEquals("the", filter, null); + assertTermEquals("lazy", filter, "JJ".getBytes("UTF-8")); + assertTermEquals("brown", filter, "JJ".getBytes("UTF-8")); + assertTermEquals("dogs", filter, "NN".getBytes("UTF-8")); + assertTrue(filter.next(new Token()) == null); + } + + + public void testFloatEncoding() throws Exception { + String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7"; + DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new FloatEncoder()); + TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class); + PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class); + assertTermEquals("The", filter, termAtt, payAtt, null); + assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f)); + assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f)); + assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f)); + assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f)); + assertTermEquals("over", filter, termAtt, payAtt, null); + assertTermEquals("the", filter, termAtt, payAtt, null); + assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f)); + assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f)); + assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f)); + assertFalse(filter.incrementToken()); + } + + public void testIntEncoding() throws Exception { + String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83"; + DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new IntegerEncoder()); + TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class); + PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class); + assertTermEquals("The", filter, termAtt, payAtt, null); + assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1)); + assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2)); + assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3)); + assertTermEquals("jumped", filter, termAtt, payAtt, null); + assertTermEquals("over", filter, termAtt, payAtt, null); + assertTermEquals("the", filter, termAtt, payAtt, null); + assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5)); + assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99)); + assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83)); + assertFalse(filter.incrementToken()); + } + + void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception { + Token tok = new Token(); + assertTrue(stream.next(tok) != null); + assertEquals(expected, tok.term()); + Payload payload = tok.getPayload(); + if (payload != null) { + assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length); + for (int i = 0; i < expectPay.length; i++) { + assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i)); + + } + } else { + assertTrue("expectPay is not null and it should be", expectPay == null); + } + } + + + void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception { + assertTrue(stream.incrementToken()); + assertEquals(expected, termAtt.term()); + Payload payload = payAtt.getPayload(); + if (payload != null) { + assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length); + for (int i = 0; i < expectPay.length; i++) { + assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i)); + + } + } else { + assertTrue("expectPay is not null and it should be", expectPay == null); + } + } +} diff --git a/src/java/org/apache/lucene/util/ArrayUtil.java b/src/java/org/apache/lucene/util/ArrayUtil.java index 4fe6b562b48..26a9c54dc13 100644 --- a/src/java/org/apache/lucene/util/ArrayUtil.java +++ b/src/java/org/apache/lucene/util/ArrayUtil.java @@ -7,9 +7,9 @@ package org.apache.lucene.util; * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,6 +18,107 @@ package org.apache.lucene.util; */ public final class ArrayUtil { + /* + Begin Apache Harmony code + + Revision taken on Friday, June 12. https://svn.apache.org/repos/asf/harmony/enhanced/classlib/archive/java6/modules/luni/src/main/java/java/lang/Integer.java + + */ + + /** + * Parses the string argument as if it was an int value and returns the + * result. Throws NumberFormatException if the string does not represent an + * int quantity. + * + * @param chars a string representation of an int quantity. + * @return int the value represented by the argument + * @throws NumberFormatException if the argument could not be parsed as an int quantity. + */ + public static int parseInt(char[] chars) throws NumberFormatException { + return parseInt(chars, 0, chars.length, 10); + } + + /** + * Parses a char array into an int. + * @param chars the character array + * @param offset The offset into the array + * @param len The length + * @return the int + * @throws NumberFormatException if it can't parse + */ + public static int parseInt(char[] chars, int offset, int len) throws NumberFormatException { + return parseInt(chars, offset, len, 10); + } + + /** + * Parses the string argument as if it was an int value and returns the + * result. Throws NumberFormatException if the string does not represent an + * int quantity. The second argument specifies the radix to use when parsing + * the value. + * + * @param chars a string representation of an int quantity. + * @param radix the base to use for conversion. + * @return int the value represented by the argument + * @throws NumberFormatException if the argument could not be parsed as an int quantity. + */ + public static int parseInt(char[] chars, int offset, int len, int radix) + throws NumberFormatException { + if (chars == null || radix < Character.MIN_RADIX + || radix > Character.MAX_RADIX) { + throw new NumberFormatException(); + } + int i = 0; + if (len == 0) { + throw new NumberFormatException("chars length is 0"); + } + boolean negative = chars[offset + i] == '-'; + if (negative && ++i == len) { + throw new NumberFormatException("can't convert to an int"); + } + if (negative == true){ + offset++; + len--; + } + return parse(chars, offset, len, radix, negative); + } + + + private static int parse(char[] chars, int offset, int len, int radix, + boolean negative) throws NumberFormatException { + int max = Integer.MIN_VALUE / radix; + int result = 0; + for (int i = 0; i < len; i++){ + int digit = Character.digit(chars[i + offset], radix); + if (digit == -1) { + throw new NumberFormatException("Unable to parse"); + } + if (max > result) { + throw new NumberFormatException("Unable to parse"); + } + int next = result * radix - digit; + if (next > result) { + throw new NumberFormatException("Unable to parse"); + } + result = next; + } + /*while (offset < len) { + + }*/ + if (!negative) { + result = -result; + if (result < 0) { + throw new NumberFormatException("Unable to parse"); + } + } + return result; + } + + + /* + + END APACHE HARMONY CODE + */ + public static int getNextSize(int targetSize) { /* This over-allocates proportional to the list size, making room @@ -35,7 +136,7 @@ public final class ArrayUtil { // Only reallocate if we are "substantially" smaller. // This saves us from "running hot" (constantly making a // bit bigger then a bit smaller, over and over): - if (newSize < currentSize/2) + if (newSize < currentSize / 2) return newSize; else return currentSize; @@ -51,7 +152,7 @@ public final class ArrayUtil { } public static int[] grow(int[] array) { - return grow(array, 1+array.length); + return grow(array, 1 + array.length); } public static int[] shrink(int[] array, int targetSize) { @@ -74,7 +175,7 @@ public final class ArrayUtil { } public static long[] grow(long[] array) { - return grow(array, 1+array.length); + return grow(array, 1 + array.length); } public static long[] shrink(long[] array, int targetSize) { @@ -97,7 +198,7 @@ public final class ArrayUtil { } public static byte[] grow(byte[] array) { - return grow(array, 1+array.length); + return grow(array, 1 + array.length); } public static byte[] shrink(byte[] array, int targetSize) { @@ -110,21 +211,25 @@ public final class ArrayUtil { return array; } - /** Returns hash of chars in range start (inclusive) to - * end (inclusive) */ + /** + * Returns hash of chars in range start (inclusive) to + * end (inclusive) + */ public static int hashCode(char[] array, int start, int end) { int code = 0; - for(int i=end-1;i>=start;i--) - code = code*31 + array[i]; + for (int i = end - 1; i >= start; i--) + code = code * 31 + array[i]; return code; } - /** Returns hash of chars in range start (inclusive) to - * end (inclusive) */ + /** + * Returns hash of chars in range start (inclusive) to + * end (inclusive) + */ public static int hashCode(byte[] array, int start, int end) { int code = 0; - for(int i=end-1;i>=start;i--) - code = code*31 + array[i]; + for (int i = end - 1; i >= start; i--) + code = code * 31 + array[i]; return code; } } diff --git a/src/test/org/apache/lucene/util/ArrayUtilTest.java b/src/test/org/apache/lucene/util/ArrayUtilTest.java new file mode 100644 index 00000000000..2c25ef1507e --- /dev/null +++ b/src/test/org/apache/lucene/util/ArrayUtilTest.java @@ -0,0 +1,57 @@ +package org.apache.lucene.util; + +import junit.framework.TestCase; + + +/** + * + * + **/ +public class ArrayUtilTest extends TestCase { + + public void testParseInt() throws Exception { + int test; + try { + test = ArrayUtil.parseInt("".toCharArray()); + assertTrue(false); + } catch (NumberFormatException e) { + //expected + } + try { + test = ArrayUtil.parseInt("foo".toCharArray()); + assertTrue(false); + } catch (NumberFormatException e) { + //expected + } + try { + test = ArrayUtil.parseInt(String.valueOf(Long.MAX_VALUE).toCharArray()); + assertTrue(false); + } catch (NumberFormatException e) { + //expected + } + try { + test = ArrayUtil.parseInt("0.34".toCharArray()); + assertTrue(false); + } catch (NumberFormatException e) { + //expected + } + + try { + test = ArrayUtil.parseInt("1".toCharArray()); + assertTrue(test + " does not equal: " + 1, test == 1); + test = ArrayUtil.parseInt("-10000".toCharArray()); + assertTrue(test + " does not equal: " + -10000, test == -10000); + test = ArrayUtil.parseInt("1923".toCharArray()); + assertTrue(test + " does not equal: " + 1923, test == 1923); + test = ArrayUtil.parseInt("-1".toCharArray()); + assertTrue(test + " does not equal: " + -1, test == -1); + test = ArrayUtil.parseInt("foo 1923 bar".toCharArray(), 4, 4); + assertTrue(test + " does not equal: " + 1923, test == 1923); + } catch (NumberFormatException e) { + e.printStackTrace(); + assertTrue(false); + } + + } + +}