LUCENE-9877: Allow up to 7 exceptions in PForUtil (instead of 3) (#48)

Co-authored-by: Greg Miller <gmiller@amazon.com>
This commit is contained in:
Greg Miller 2021-03-30 06:11:33 -07:00 committed by GitHub
parent 39b8e97613
commit fd79f9737a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 22 additions and 20 deletions

View File

@ -205,6 +205,8 @@ Improvements
* LUCENE-9827: Speed up merging of stored fields and term vectors for smaller segments. * LUCENE-9827: Speed up merging of stored fields and term vectors for smaller segments.
(Daniel Mitterdorfer, Dimitrios Liapis, Adrien Grand, Robert Muir) (Daniel Mitterdorfer, Dimitrios Liapis, Adrien Grand, Robert Muir)
* LUCENE-9877: Reduce index size by increasing allowable exceptions in PForUtil from 3 to 7. (Greg Miller)
Bug fixes Bug fixes

View File

@ -42,24 +42,24 @@ public final class PForUtil {
/** Encode 128 integers from {@code longs} into {@code out}. */ /** Encode 128 integers from {@code longs} into {@code out}. */
void encode(long[] longs, DataOutput out) throws IOException { void encode(long[] longs, DataOutput out) throws IOException {
// At most 3 exceptions // At most 7 exceptions
final long[] top4 = new long[4]; final long[] top8 = new long[8];
Arrays.fill(top4, -1L); Arrays.fill(top8, -1L);
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) { for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
if (longs[i] > top4[0]) { if (longs[i] > top8[0]) {
top4[0] = longs[i]; top8[0] = longs[i];
Arrays.sort( Arrays.sort(
top4); // For only 4 entries we just sort on every iteration instead of maintaining a PQ top8); // For only 8 entries we just sort on every iteration instead of maintaining a PQ
} }
} }
final int maxBitsRequired = PackedInts.bitsRequired(top4[3]); final int maxBitsRequired = PackedInts.bitsRequired(top8[7]);
// We store the patch on a byte, so we can't decrease the number of bits required by more than 8 // We store the patch on a byte, so we can't decrease the number of bits required by more than 8
final int patchedBitsRequired = Math.max(PackedInts.bitsRequired(top4[0]), maxBitsRequired - 8); final int patchedBitsRequired = Math.max(PackedInts.bitsRequired(top8[0]), maxBitsRequired - 8);
int numExceptions = 0; int numExceptions = 0;
final long maxUnpatchedValue = (1L << patchedBitsRequired) - 1; final long maxUnpatchedValue = (1L << patchedBitsRequired) - 1;
for (int i = 1; i < 4; ++i) { for (int i = 1; i < 8; ++i) {
if (top4[i] > maxUnpatchedValue) { if (top8[i] > maxUnpatchedValue) {
numExceptions++; numExceptions++;
} }
} }

View File

@ -43,24 +43,24 @@ final class PForUtil {
/** Encode 128 integers from {@code longs} into {@code out}. */ /** Encode 128 integers from {@code longs} into {@code out}. */
void encode(long[] longs, DataOutput out) throws IOException { void encode(long[] longs, DataOutput out) throws IOException {
// At most 3 exceptions // At most 7 exceptions
final long[] top4 = new long[4]; final long[] top8 = new long[8];
Arrays.fill(top4, -1L); Arrays.fill(top8, -1L);
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) { for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
if (longs[i] > top4[0]) { if (longs[i] > top8[0]) {
top4[0] = longs[i]; top8[0] = longs[i];
Arrays.sort( Arrays.sort(
top4); // For only 4 entries we just sort on every iteration instead of maintaining a PQ top8); // For only 8 entries we just sort on every iteration instead of maintaining a PQ
} }
} }
final int maxBitsRequired = PackedInts.bitsRequired(top4[3]); final int maxBitsRequired = PackedInts.bitsRequired(top8[7]);
// We store the patch on a byte, so we can't decrease the number of bits required by more than 8 // We store the patch on a byte, so we can't decrease the number of bits required by more than 8
final int patchedBitsRequired = Math.max(PackedInts.bitsRequired(top4[0]), maxBitsRequired - 8); final int patchedBitsRequired = Math.max(PackedInts.bitsRequired(top8[0]), maxBitsRequired - 8);
int numExceptions = 0; int numExceptions = 0;
final long maxUnpatchedValue = (1L << patchedBitsRequired) - 1; final long maxUnpatchedValue = (1L << patchedBitsRequired) - 1;
for (int i = 1; i < 4; ++i) { for (int i = 1; i < 8; ++i) {
if (top4[i] > maxUnpatchedValue) { if (top8[i] > maxUnpatchedValue) {
numExceptions++; numExceptions++;
} }
} }