From 533fd2eebab5e3f37cb6cad4f020860aa2bcec41 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Thu, 24 Jan 2013 22:41:59 +0000 Subject: [PATCH] LUCENE-4690: Performance improvements and non-hashing versions of NumericUtils.*ToPrefixCoded git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1438242 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../org/apache/lucene/util/NumericUtils.java | 78 ++++++++++++------- .../search/TestNumericRangeQuery32.java | 4 +- .../search/TestNumericRangeQuery64.java | 4 +- .../apache/lucene/util/TestNumericUtils.java | 12 +-- .../org/apache/solr/schema/TrieField.java | 29 ++++--- 6 files changed, 74 insertions(+), 56 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 34d8bd85975..154795cf1c1 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -58,6 +58,9 @@ Optimizations * LUCENE-3298: FST can now be larger than 2.1 GB / 2.1 B nodes. (James Dyer, Mike McCandless) +* LUCENE-4690: Performance improvements and non-hashing versions + of NumericUtils.*ToPrefixCoded() (yonik) + New Features * LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the diff --git a/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java b/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java index 0815ecb73de..f4fcc632339 100644 --- a/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java +++ b/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java @@ -82,7 +82,7 @@ public final class NumericUtils { /** * The maximum term length (used for byte[] buffer size) * for encoding long values. - * @see #longToPrefixCoded(long,int,BytesRef) + * @see #longToPrefixCodedBytes */ public static final int BUF_SIZE_LONG = 63/7 + 2; @@ -95,7 +95,7 @@ public final class NumericUtils { /** * The maximum term length (used for byte[] buffer size) * for encoding int values. - * @see #intToPrefixCoded(int,int,BytesRef) + * @see #intToPrefixCodedBytes */ public static final int BUF_SIZE_INT = 31/7 + 2; @@ -109,15 +109,42 @@ public final class NumericUtils { * @return the hash code for indexing (TermsHash) */ public static int longToPrefixCoded(final long val, final int shift, final BytesRef bytes) { - if (shift>63 || shift<0) + longToPrefixCodedBytes(val, shift, bytes); + return bytes.hashCode(); + } + + /** + * Returns prefix coded bits after reducing the precision by shift bits. + * This is method is used by {@link NumericTokenStream}. + * After encoding, {@code bytes.offset} will always be 0. + * @param val the numeric value + * @param shift how many bits to strip from the right + * @param bytes will contain the encoded value + * @return the hash code for indexing (TermsHash) + */ + public static int intToPrefixCoded(final int val, final int shift, final BytesRef bytes) { + intToPrefixCodedBytes(val, shift, bytes); + return bytes.hashCode(); + } + + /** + * Returns prefix coded bits after reducing the precision by shift bits. + * This is method is used by {@link NumericTokenStream}. + * After encoding, {@code bytes.offset} will always be 0. + * @param val the numeric value + * @param shift how many bits to strip from the right + * @param bytes will contain the encoded value + */ + public static void longToPrefixCodedBytes(final long val, final int shift, final BytesRef bytes) { + if ((shift & ~0x3f) != 0) // ensure shift is 0..63 throw new IllegalArgumentException("Illegal shift value, must be 0..63"); - int hash, nChars = (63-shift)/7 + 1; + int nChars = (((63-shift)*37)>>8) + 1; // i/7 is the same as (i*37)>>8 for i in 0..63 bytes.offset = 0; - bytes.length = nChars+1; + bytes.length = nChars+1; // one extra for the byte that contains the shift info if (bytes.bytes.length < bytes.length) { - bytes.grow(NumericUtils.BUF_SIZE_LONG); + bytes.bytes = new byte[NumericUtils.BUF_SIZE_LONG]; // use the max } - bytes.bytes[0] = (byte) (hash = (SHIFT_START_LONG + shift)); + bytes.bytes[0] = (byte)(SHIFT_START_LONG + shift); long sortableBits = val ^ 0x8000000000000000L; sortableBits >>>= shift; while (nChars > 0) { @@ -126,13 +153,9 @@ public final class NumericUtils { bytes.bytes[nChars--] = (byte)(sortableBits & 0x7f); sortableBits >>>= 7; } - // calculate hash - for (int i = 1; i < bytes.length; i++) { - hash = 31*hash + bytes.bytes[i]; - } - return hash; } + /** * Returns prefix coded bits after reducing the precision by shift bits. * This is method is used by {@link NumericTokenStream}. @@ -140,18 +163,17 @@ public final class NumericUtils { * @param val the numeric value * @param shift how many bits to strip from the right * @param bytes will contain the encoded value - * @return the hash code for indexing (TermsHash) */ - public static int intToPrefixCoded(final int val, final int shift, final BytesRef bytes) { - if (shift>31 || shift<0) + public static void intToPrefixCodedBytes(final int val, final int shift, final BytesRef bytes) { + if ((shift & ~0x1f) != 0) // ensure shift is 0..31 throw new IllegalArgumentException("Illegal shift value, must be 0..31"); - int hash, nChars = (31-shift)/7 + 1; + int nChars = (((31-shift)*37)>>8) + 1; // i/7 is the same as (i*37)>>8 for i in 0..63 bytes.offset = 0; - bytes.length = nChars+1; + bytes.length = nChars+1; // one extra for the byte that contains the shift info if (bytes.bytes.length < bytes.length) { - bytes.grow(NumericUtils.BUF_SIZE_INT); + bytes.bytes = new byte[NumericUtils.BUF_SIZE_LONG]; // use the max } - bytes.bytes[0] = (byte) (hash = (SHIFT_START_INT + shift)); + bytes.bytes[0] = (byte)(SHIFT_START_INT + shift); int sortableBits = val ^ 0x80000000; sortableBits >>>= shift; while (nChars > 0) { @@ -160,13 +182,9 @@ public final class NumericUtils { bytes.bytes[nChars--] = (byte)(sortableBits & 0x7f); sortableBits >>>= 7; } - // calculate hash - for (int i = 1; i < bytes.length; i++) { - hash = 31*hash + bytes.bytes[i]; - } - return hash; } + /** * Returns the shift value from a prefix encoded {@code long}. * @throws NumberFormatException if the supplied {@link BytesRef} is @@ -197,7 +215,7 @@ public final class NumericUtils { * This method can be used to decode a term's value. * @throws NumberFormatException if the supplied {@link BytesRef} is * not correctly prefix encoded. - * @see #longToPrefixCoded(long,int,BytesRef) + * @see #longToPrefixCodedBytes */ public static long prefixCodedToLong(final BytesRef val) { long sortableBits = 0L; @@ -221,7 +239,7 @@ public final class NumericUtils { * This method can be used to decode a term's value. * @throws NumberFormatException if the supplied {@link BytesRef} is * not correctly prefix encoded. - * @see #intToPrefixCoded(int,int,BytesRef) + * @see #intToPrefixCodedBytes */ public static int prefixCodedToInt(final BytesRef val) { int sortableBits = 0; @@ -402,8 +420,8 @@ public final class NumericUtils { */ public void addRange(final long min, final long max, final int shift) { final BytesRef minBytes = new BytesRef(BUF_SIZE_LONG), maxBytes = new BytesRef(BUF_SIZE_LONG); - longToPrefixCoded(min, shift, minBytes); - longToPrefixCoded(max, shift, maxBytes); + longToPrefixCodedBytes(min, shift, minBytes); + longToPrefixCodedBytes(max, shift, maxBytes); addRange(minBytes, maxBytes); } @@ -431,8 +449,8 @@ public final class NumericUtils { */ public void addRange(final int min, final int max, final int shift) { final BytesRef minBytes = new BytesRef(BUF_SIZE_INT), maxBytes = new BytesRef(BUF_SIZE_INT); - intToPrefixCoded(min, shift, minBytes); - intToPrefixCoded(max, shift, maxBytes); + intToPrefixCodedBytes(min, shift, minBytes); + intToPrefixCodedBytes(max, shift, maxBytes); addRange(minBytes, maxBytes); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java b/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java index 6711f7add70..4ce557524f3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java @@ -380,8 +380,8 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { int a=lower; lower=upper; upper=a; } final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_INT), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_INT); - NumericUtils.intToPrefixCoded(lower, 0, lowerBytes); - NumericUtils.intToPrefixCoded(upper, 0, upperBytes); + NumericUtils.intToPrefixCodedBytes(lower, 0, lowerBytes); + NumericUtils.intToPrefixCodedBytes(upper, 0, upperBytes); // test inclusive range NumericRangeQuery tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java b/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java index ede30d9ed44..648c7c75516 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java @@ -405,8 +405,8 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { long a=lower; lower=upper; upper=a; } final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); - NumericUtils.longToPrefixCoded(lower, 0, lowerBytes); - NumericUtils.longToPrefixCoded(upper, 0, upperBytes); + NumericUtils.longToPrefixCodedBytes(lower, 0, lowerBytes); + NumericUtils.longToPrefixCodedBytes(upper, 0, upperBytes); // test inclusive range NumericRangeQuery tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); diff --git a/lucene/core/src/test/org/apache/lucene/util/TestNumericUtils.java b/lucene/core/src/test/org/apache/lucene/util/TestNumericUtils.java index 9153a1b0db9..125fd6d27a0 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestNumericUtils.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestNumericUtils.java @@ -28,7 +28,7 @@ public class TestNumericUtils extends LuceneTestCase { // generate a series of encoded longs, each numerical one bigger than the one before BytesRef last=null, act=new BytesRef(NumericUtils.BUF_SIZE_LONG); for (long l=-100000L; l<100000L; l++) { - NumericUtils.longToPrefixCoded(l, 0, act); + NumericUtils.longToPrefixCodedBytes(l, 0, act); if (last!=null) { // test if smaller assertTrue("actual bigger than last (BytesRef)", last.compareTo(act) < 0 ); @@ -46,7 +46,7 @@ public class TestNumericUtils extends LuceneTestCase { // generate a series of encoded ints, each numerical one bigger than the one before BytesRef last=null, act=new BytesRef(NumericUtils.BUF_SIZE_INT); for (int i=-100000; i<100000; i++) { - NumericUtils.intToPrefixCoded(i, 0, act); + NumericUtils.intToPrefixCodedBytes(i, 0, act); if (last!=null) { // test if smaller assertTrue("actual bigger than last (BytesRef)", last.compareTo(act) < 0 ); @@ -69,7 +69,7 @@ public class TestNumericUtils extends LuceneTestCase { for (int i=0; i