From 533fd2eebab5e3f37cb6cad4f020860aa2bcec41 Mon Sep 17 00:00:00 2001
From: Yonik Seeley <yonik@apache.org>
Date: Thu, 24 Jan 2013 22:41:59 +0000
Subject: [PATCH] LUCENE-4690: Performance improvements and non-hashing
 versions of NumericUtils.*ToPrefixCoded

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1438242 13f79535-47bb-0310-9956-ffa450edef68
---
 lucene/CHANGES.txt                            |  3 +
 .../org/apache/lucene/util/NumericUtils.java  | 78 ++++++++++++-------
 .../search/TestNumericRangeQuery32.java       |  4 +-
 .../search/TestNumericRangeQuery64.java       |  4 +-
 .../apache/lucene/util/TestNumericUtils.java  | 12 +--
 .../org/apache/solr/schema/TrieField.java     | 29 ++++---
 6 files changed, 74 insertions(+), 56 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 34d8bd85975..154795cf1c1 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -58,6 +58,9 @@ Optimizations
 * LUCENE-3298: FST can now be larger than 2.1 GB / 2.1 B nodes.
   (James Dyer, Mike McCandless)
 
+* LUCENE-4690: Performance improvements and non-hashing versions
+  of NumericUtils.*ToPrefixCoded() (yonik)
+
 New Features
 
 * LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the 
diff --git a/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java b/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java
index 0815ecb73de..f4fcc632339 100644
--- a/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java
@@ -82,7 +82,7 @@ public final class NumericUtils {
   /**
    * The maximum term length (used for <code>byte[]</code> buffer size)
    * for encoding <code>long</code> values.
-   * @see #longToPrefixCoded(long,int,BytesRef)
+   * @see #longToPrefixCodedBytes
    */
   public static final int BUF_SIZE_LONG = 63/7 + 2;
 
@@ -95,7 +95,7 @@ public final class NumericUtils {
   /**
    * The maximum term length (used for <code>byte[]</code> buffer size)
    * for encoding <code>int</code> values.
-   * @see #intToPrefixCoded(int,int,BytesRef)
+   * @see #intToPrefixCodedBytes
    */
   public static final int BUF_SIZE_INT = 31/7 + 2;
 
@@ -109,15 +109,42 @@ public final class NumericUtils {
    * @return the hash code for indexing (TermsHash)
    */
   public static int longToPrefixCoded(final long val, final int shift, final BytesRef bytes) {
-    if (shift>63 || shift<0)
+    longToPrefixCodedBytes(val, shift, bytes);
+    return bytes.hashCode();
+  }
+
+  /**
+   * Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
+   * This is method is used by {@link NumericTokenStream}.
+   * After encoding, {@code bytes.offset} will always be 0.
+   * @param val the numeric value
+   * @param shift how many bits to strip from the right
+   * @param bytes will contain the encoded value
+   * @return the hash code for indexing (TermsHash)
+   */
+  public static int intToPrefixCoded(final int val, final int shift, final BytesRef bytes) {
+    intToPrefixCodedBytes(val, shift, bytes);
+    return bytes.hashCode();
+  }
+
+  /**
+   * Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
+   * This is method is used by {@link NumericTokenStream}.
+   * After encoding, {@code bytes.offset} will always be 0.
+   * @param val the numeric value
+   * @param shift how many bits to strip from the right
+   * @param bytes will contain the encoded value
+   */
+  public static void longToPrefixCodedBytes(final long val, final int shift, final BytesRef bytes) {
+    if ((shift & ~0x3f) != 0)  // ensure shift is 0..63
       throw new IllegalArgumentException("Illegal shift value, must be 0..63");
-    int hash, nChars = (63-shift)/7 + 1;
+    int nChars = (((63-shift)*37)>>8) + 1;    // i/7 is the same as (i*37)>>8 for i in 0..63
     bytes.offset = 0;
-    bytes.length = nChars+1;
+    bytes.length = nChars+1;   // one extra for the byte that contains the shift info
     if (bytes.bytes.length < bytes.length) {
-      bytes.grow(NumericUtils.BUF_SIZE_LONG);
+      bytes.bytes = new byte[NumericUtils.BUF_SIZE_LONG];  // use the max
     }
-    bytes.bytes[0] = (byte) (hash = (SHIFT_START_LONG + shift));
+    bytes.bytes[0] = (byte)(SHIFT_START_LONG + shift);
     long sortableBits = val ^ 0x8000000000000000L;
     sortableBits >>>= shift;
     while (nChars > 0) {
@@ -126,13 +153,9 @@ public final class NumericUtils {
       bytes.bytes[nChars--] = (byte)(sortableBits & 0x7f);
       sortableBits >>>= 7;
     }
-    // calculate hash
-    for (int i = 1; i < bytes.length; i++) {
-      hash = 31*hash + bytes.bytes[i];
-    }
-    return hash;
   }
 
+
   /**
    * Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
    * This is method is used by {@link NumericTokenStream}.
@@ -140,18 +163,17 @@ public final class NumericUtils {
    * @param val the numeric value
    * @param shift how many bits to strip from the right
    * @param bytes will contain the encoded value
-   * @return the hash code for indexing (TermsHash)
    */
-  public static int intToPrefixCoded(final int val, final int shift, final BytesRef bytes) {
-    if (shift>31 || shift<0)
+  public static void intToPrefixCodedBytes(final int val, final int shift, final BytesRef bytes) {
+    if ((shift & ~0x1f) != 0)  // ensure shift is 0..31
       throw new IllegalArgumentException("Illegal shift value, must be 0..31");
-    int hash, nChars = (31-shift)/7 + 1;
+    int nChars = (((31-shift)*37)>>8) + 1;    // i/7 is the same as (i*37)>>8 for i in 0..63
     bytes.offset = 0;
-    bytes.length = nChars+1;
+    bytes.length = nChars+1;   // one extra for the byte that contains the shift info
     if (bytes.bytes.length < bytes.length) {
-      bytes.grow(NumericUtils.BUF_SIZE_INT);
+      bytes.bytes = new byte[NumericUtils.BUF_SIZE_LONG];  // use the max
     }
-    bytes.bytes[0] = (byte) (hash = (SHIFT_START_INT + shift));
+    bytes.bytes[0] = (byte)(SHIFT_START_INT + shift);
     int sortableBits = val ^ 0x80000000;
     sortableBits >>>= shift;
     while (nChars > 0) {
@@ -160,13 +182,9 @@ public final class NumericUtils {
       bytes.bytes[nChars--] = (byte)(sortableBits & 0x7f);
       sortableBits >>>= 7;
     }
-    // calculate hash
-    for (int i = 1; i < bytes.length; i++) {
-      hash = 31*hash + bytes.bytes[i];
-    }
-    return hash;
   }
 
+
   /**
    * Returns the shift value from a prefix encoded {@code long}.
    * @throws NumberFormatException if the supplied {@link BytesRef} is
@@ -197,7 +215,7 @@ public final class NumericUtils {
    * This method can be used to decode a term's value.
    * @throws NumberFormatException if the supplied {@link BytesRef} is
    * not correctly prefix encoded.
-   * @see #longToPrefixCoded(long,int,BytesRef)
+   * @see #longToPrefixCodedBytes
    */
   public static long prefixCodedToLong(final BytesRef val) {
     long sortableBits = 0L;
@@ -221,7 +239,7 @@ public final class NumericUtils {
    * This method can be used to decode a term's value.
    * @throws NumberFormatException if the supplied {@link BytesRef} is
    * not correctly prefix encoded.
-   * @see #intToPrefixCoded(int,int,BytesRef)
+   * @see #intToPrefixCodedBytes
    */
   public static int prefixCodedToInt(final BytesRef val) {
     int sortableBits = 0;
@@ -402,8 +420,8 @@ public final class NumericUtils {
      */
     public void addRange(final long min, final long max, final int shift) {
       final BytesRef minBytes = new BytesRef(BUF_SIZE_LONG), maxBytes = new BytesRef(BUF_SIZE_LONG);
-      longToPrefixCoded(min, shift, minBytes);
-      longToPrefixCoded(max, shift, maxBytes);
+      longToPrefixCodedBytes(min, shift, minBytes);
+      longToPrefixCodedBytes(max, shift, maxBytes);
       addRange(minBytes, maxBytes);
     }
   
@@ -431,8 +449,8 @@ public final class NumericUtils {
      */
     public void addRange(final int min, final int max, final int shift) {
       final BytesRef minBytes = new BytesRef(BUF_SIZE_INT), maxBytes = new BytesRef(BUF_SIZE_INT);
-      intToPrefixCoded(min, shift, minBytes);
-      intToPrefixCoded(max, shift, maxBytes);
+      intToPrefixCodedBytes(min, shift, minBytes);
+      intToPrefixCodedBytes(max, shift, maxBytes);
       addRange(minBytes, maxBytes);
     }
   
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java b/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
index 6711f7add70..4ce557524f3 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
@@ -380,8 +380,8 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
         int a=lower; lower=upper; upper=a;
       }
       final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_INT), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
-      NumericUtils.intToPrefixCoded(lower, 0, lowerBytes);
-      NumericUtils.intToPrefixCoded(upper, 0, upperBytes);
+      NumericUtils.intToPrefixCodedBytes(lower, 0, lowerBytes);
+      NumericUtils.intToPrefixCodedBytes(upper, 0, upperBytes);
 
       // test inclusive range
       NumericRangeQuery<Integer> tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java b/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
index ede30d9ed44..648c7c75516 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
@@ -405,8 +405,8 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
         long a=lower; lower=upper; upper=a;
       }
       final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
-      NumericUtils.longToPrefixCoded(lower, 0, lowerBytes);
-      NumericUtils.longToPrefixCoded(upper, 0, upperBytes);
+      NumericUtils.longToPrefixCodedBytes(lower, 0, lowerBytes);
+      NumericUtils.longToPrefixCodedBytes(upper, 0, upperBytes);
       
       // test inclusive range
       NumericRangeQuery<Long> tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestNumericUtils.java b/lucene/core/src/test/org/apache/lucene/util/TestNumericUtils.java
index 9153a1b0db9..125fd6d27a0 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestNumericUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestNumericUtils.java
@@ -28,7 +28,7 @@ public class TestNumericUtils extends LuceneTestCase {
     // generate a series of encoded longs, each numerical one bigger than the one before
     BytesRef last=null, act=new BytesRef(NumericUtils.BUF_SIZE_LONG);
     for (long l=-100000L; l<100000L; l++) {
-      NumericUtils.longToPrefixCoded(l, 0, act);
+      NumericUtils.longToPrefixCodedBytes(l, 0, act);
       if (last!=null) {
         // test if smaller
         assertTrue("actual bigger than last (BytesRef)", last.compareTo(act) < 0 );
@@ -46,7 +46,7 @@ public class TestNumericUtils extends LuceneTestCase {
     // generate a series of encoded ints, each numerical one bigger than the one before
     BytesRef last=null, act=new BytesRef(NumericUtils.BUF_SIZE_INT);
     for (int i=-100000; i<100000; i++) {
-      NumericUtils.intToPrefixCoded(i, 0, act);
+      NumericUtils.intToPrefixCodedBytes(i, 0, act);
       if (last!=null) {
         // test if smaller
         assertTrue("actual bigger than last (BytesRef)", last.compareTo(act) < 0 );
@@ -69,7 +69,7 @@ public class TestNumericUtils extends LuceneTestCase {
     
     for (int i=0; i<vals.length; i++) {
       prefixVals[i] = new BytesRef(NumericUtils.BUF_SIZE_LONG);
-      NumericUtils.longToPrefixCoded(vals[i], 0, prefixVals[i]);
+      NumericUtils.longToPrefixCodedBytes(vals[i], 0, prefixVals[i]);
       
       // check forward and back conversion
       assertEquals( "forward and back conversion should generate same long", vals[i], NumericUtils.prefixCodedToLong(prefixVals[i]) );
@@ -92,7 +92,7 @@ public class TestNumericUtils extends LuceneTestCase {
     final BytesRef ref = new BytesRef(NumericUtils.BUF_SIZE_LONG);
     for (int i=0; i<vals.length; i++) {
       for (int j=0; j<64; j++) {
-        NumericUtils.longToPrefixCoded(vals[i], j, ref);
+        NumericUtils.longToPrefixCodedBytes(vals[i], j, ref);
         long prefixVal=NumericUtils.prefixCodedToLong(ref);
         long mask=(1L << j) - 1L;
         assertEquals( "difference between prefix val and original value for "+vals[i]+" with shift="+j, vals[i] & mask, vals[i]-prefixVal );
@@ -109,7 +109,7 @@ public class TestNumericUtils extends LuceneTestCase {
     
     for (int i=0; i<vals.length; i++) {
       prefixVals[i] = new BytesRef(NumericUtils.BUF_SIZE_INT);
-      NumericUtils.intToPrefixCoded(vals[i], 0, prefixVals[i]);
+      NumericUtils.intToPrefixCodedBytes(vals[i], 0, prefixVals[i]);
       
       // check forward and back conversion
       assertEquals( "forward and back conversion should generate same int", vals[i], NumericUtils.prefixCodedToInt(prefixVals[i]) );
@@ -132,7 +132,7 @@ public class TestNumericUtils extends LuceneTestCase {
     final BytesRef ref = new BytesRef(NumericUtils.BUF_SIZE_LONG);
     for (int i=0; i<vals.length; i++) {
       for (int j=0; j<32; j++) {
-        NumericUtils.intToPrefixCoded(vals[i], j, ref);
+        NumericUtils.intToPrefixCodedBytes(vals[i], j, ref);
         int prefixVal=NumericUtils.prefixCodedToInt(ref);
         int mask=(1 << j) - 1;
         assertEquals( "difference between prefix val and original value for "+vals[i]+" with shift="+j, vals[i] & mask, vals[i]-prefixVal );
diff --git a/solr/core/src/java/org/apache/solr/schema/TrieField.java b/solr/core/src/java/org/apache/solr/schema/TrieField.java
index 77d29b71cfc..afe4da286b0 100644
--- a/solr/core/src/java/org/apache/solr/schema/TrieField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieField.java
@@ -29,8 +29,6 @@ import org.apache.lucene.document.FieldType.NumericType;
 import org.apache.lucene.document.FloatField;
 import org.apache.lucene.document.IntField;
 import org.apache.lucene.document.LongField;
-import org.apache.lucene.index.GeneralField;
-import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.StorableField;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
@@ -48,7 +46,6 @@ import org.apache.solr.analysis.TrieTokenizerFactory;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.search.QParser;
-import org.apache.solr.search.function.*;
 
 /**
  * Provides field types to support for Lucene's {@link
@@ -311,19 +308,19 @@ public class TrieField extends PrimitiveFieldType {
     String s = val.toString();
     switch (type) {
       case INTEGER:
-        NumericUtils.intToPrefixCoded(Integer.parseInt(s), 0, result);
+        NumericUtils.intToPrefixCodedBytes(Integer.parseInt(s), 0, result);
         break;
       case FLOAT:
-        NumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(Float.parseFloat(s)), 0, result);
+        NumericUtils.intToPrefixCodedBytes(NumericUtils.floatToSortableInt(Float.parseFloat(s)), 0, result);
         break;
       case LONG:
-        NumericUtils.longToPrefixCoded(Long.parseLong(s), 0, result);
+        NumericUtils.longToPrefixCodedBytes(Long.parseLong(s), 0, result);
         break;
       case DOUBLE:
-        NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(Double.parseDouble(s)), 0, result);
+        NumericUtils.longToPrefixCodedBytes(NumericUtils.doubleToSortableLong(Double.parseDouble(s)), 0, result);
         break;
       case DATE:
-        NumericUtils.longToPrefixCoded(dateField.parseMath(null, s).getTime(), 0, result);
+        NumericUtils.longToPrefixCodedBytes(dateField.parseMath(null, s).getTime(), 0, result);
         break;
       default:
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
@@ -419,17 +416,17 @@ public class TrieField extends PrimitiveFieldType {
     if (val != null) {
       switch (type) {
         case INTEGER:
-          NumericUtils.intToPrefixCoded(val.intValue(), 0, bytes);
+          NumericUtils.intToPrefixCodedBytes(val.intValue(), 0, bytes);
           break;
         case FLOAT:
-          NumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(val.floatValue()), 0, bytes);
+          NumericUtils.intToPrefixCodedBytes(NumericUtils.floatToSortableInt(val.floatValue()), 0, bytes);
           break;
         case LONG: //fallthrough!
         case DATE:
-          NumericUtils.longToPrefixCoded(val.longValue(), 0, bytes);
+          NumericUtils.longToPrefixCodedBytes(val.longValue(), 0, bytes);
           break;
         case DOUBLE:
-          NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(val.doubleValue()), 0, bytes);
+          NumericUtils.longToPrefixCodedBytes(NumericUtils.doubleToSortableLong(val.doubleValue()), 0, bytes);
           break;
         default:
           throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
@@ -441,7 +438,7 @@ public class TrieField extends PrimitiveFieldType {
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid field contents: "+f.name());
       switch (type) {
         case INTEGER:
-          NumericUtils.intToPrefixCoded(toInt(bytesRef.bytes, bytesRef.offset), 0, bytes);
+          NumericUtils.intToPrefixCodedBytes(toInt(bytesRef.bytes, bytesRef.offset), 0, bytes);
           break;
         case FLOAT: {
           // WARNING: Code Duplication! Keep in sync with o.a.l.util.NumericUtils!
@@ -449,12 +446,12 @@ public class TrieField extends PrimitiveFieldType {
           // code in next 2 lines is identical to: int v = NumericUtils.floatToSortableInt(Float.intBitsToFloat(toInt(arr)));
           int v = toInt(bytesRef.bytes, bytesRef.offset);
           if (v<0) v ^= 0x7fffffff;
-          NumericUtils.intToPrefixCoded(v, 0, bytes);
+          NumericUtils.intToPrefixCodedBytes(v, 0, bytes);
           break;
         }
         case LONG: //fallthrough!
         case DATE:
-          NumericUtils.longToPrefixCoded(toLong(bytesRef.bytes, bytesRef.offset), 0, bytes);
+          NumericUtils.longToPrefixCodedBytes(toLong(bytesRef.bytes, bytesRef.offset), 0, bytes);
           break;
         case DOUBLE: {
           // WARNING: Code Duplication! Keep in sync with o.a.l.util.NumericUtils!
@@ -462,7 +459,7 @@ public class TrieField extends PrimitiveFieldType {
           // code in next 2 lines is identical to: long v = NumericUtils.doubleToSortableLong(Double.longBitsToDouble(toLong(arr)));
           long v = toLong(bytesRef.bytes, bytesRef.offset);
           if (v<0) v ^= 0x7fffffffffffffffL;
-          NumericUtils.longToPrefixCoded(v, 0, bytes);
+          NumericUtils.longToPrefixCodedBytes(v, 0, bytes);
           break;
         }
         default: