From 5871ea797244fd395e428576b6698cee09a5e2f1 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Mon, 20 Sep 2021 19:12:38 +0200 Subject: [PATCH] LUCENE-10112: Improve LZ4 Compression performance with direct primitive read/writes (#310) Co-authored-by: Tim Brooks --- lucene/CHANGES.txt | 3 +++ .../src/java/org/apache/lucene/util/compress/LZ4.java | 9 +++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b337e98c9cb..b994e0d3ba7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -276,6 +276,9 @@ Improvements file format in input/output classes like DataInput / DataOutput and codecs. (Uwe Schindler, Robert Muir) +* LUCENE-10112: Improve LZ4 Compression performance with direct primitive read/writes. + (Tim Brooks, Uwe Schindler, Robert Muir, Adrien Grand) + Bug fixes * LUCENE-10070 Skip deleted docs when accumulating facet counts for all docs. (Ankur Goel, Greg Miller) diff --git a/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java b/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java index e7ed674d22c..8e47bcf5edc 100644 --- a/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java +++ b/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java @@ -31,6 +31,7 @@ import java.util.Arrays; import java.util.Objects; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.packed.PackedInts; /** @@ -63,10 +64,10 @@ public final class LZ4 { } private static int readInt(byte[] buf, int i) { - return ((buf[i] & 0xFF) << 24) - | ((buf[i + 1] & 0xFF) << 16) - | ((buf[i + 2] & 0xFF) << 8) - | (buf[i + 3] & 0xFF); + // we hardcode LITTLE ENDIAN here as this is most performant on most platforms. + // According to LZ4's alogrithm the endianness does not matter at all, but we + // want to prevent indexes to differ just because of platform endianness! + return (int) BitUtil.VH_LE_INT.get(buf, i); } private static int commonBytes(byte[] b, int o1, int o2, int limit) {