#! /usr/bin/env python # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. try: # python 3.9+ from math import gcd except ImportError: # old python from fractions import gcd """Code generation for bulk operations""" MAX_SPECIALIZED_BITS_PER_VALUE = 24; PACKED_64_SINGLE_BLOCK_BPV = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32] OUTPUT_FILE = "BulkOperation.java" HEADER = """// This file has been automatically generated, DO NOT EDIT /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.util.packed; """ FOOTER = """ protected int writeLong(long block, byte[] blocks, int blocksOffset) { for (int j = 1; j <= 8; ++j) { blocks[blocksOffset++] = (byte) (block >>> (64 - (j << 3))); } return blocksOffset; } /** * For every number of bits per value, there is a minimum number of * blocks (b) / values (v) you need to write in order to reach the next block * boundary: *

   *  - 16 bits per value -> b=2, v=1
   *  - 24 bits per value -> b=3, v=1
   *  - 50 bits per value -> b=25, v=4
   *  - 63 bits per value -> b=63, v=8
   *  - ...
   *

* * A bulk read consists in copying iterations*v values that are * contained in iterations*b blocks into a long[] * (higher values of iterations are likely to yield a better * throughput): this requires n * (b + 8v) bytes of memory. * * This method computes iterations as * ramBudget / (b + 8v) (since a long is 8 bytes). */ public final int computeIterations(int valueCount, int ramBudget) { final int iterations = ramBudget / (byteBlockCount() + 8 * byteValueCount()); if (iterations == 0) { // at least 1 return 1; } else if ((iterations - 1) * byteValueCount() >= valueCount) { // don't allocate for more than the size of the reader return (int) Math.ceil((double) valueCount / byteValueCount()); } else { return iterations; } } } """ def is_power_of_two(n): return n & (n - 1) == 0 def casts(typ): cast_start = "(%s) (" % typ cast_end = ")" if typ == "long": cast_start = "" cast_end = "" return cast_start, cast_end def hexNoLSuffix(n): # On 32 bit Python values > (1 << 31)-1 will have L appended by hex function: s = hex(n) if s.endswith('L'): s = s[:-1] return s def masks(bits): if bits == 64: return "", "" return "(", " & %sL)" % (hexNoLSuffix((1 << bits) - 1)) def get_type(bits): if bits == 8: return "byte" elif bits == 16: return "short" elif bits == 32: return "int" elif bits == 64: return "long" else: assert False def block_value_count(bpv, bits=64): blocks = bpv values = blocks * bits // bpv while blocks % 2 == 0 and values % 2 == 0: blocks //= 2 values //= 2 assert values * bpv == bits * blocks, "%d values, %d blocks, %d bits per value" % (values, blocks, bpv) return (blocks, values) def packed64(bpv, f): mask = (1 << bpv) - 1 f.write("\n") f.write(" public BulkOperationPacked%d() {\n" % bpv) f.write(" super(%d);\n" % bpv) f.write(" }\n\n") if bpv == 64: f.write(""" @Override public void decode(long[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) { System.arraycopy(blocks, blocksOffset, values, valuesOffset, valueCount() * iterations); } @Override public void decode(long[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) { throw new UnsupportedOperationException(); } @Override public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) { throw new UnsupportedOperationException(); } @Override public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) { LongBuffer.wrap(values, valuesOffset, iterations * valueCount()).put(ByteBuffer.wrap(blocks, blocksOffset, 8 * iterations * blockCount()).asLongBuffer()); } """) else: p64_decode(bpv, f, 32) p64_decode(bpv, f, 64) def p64_decode(bpv, f, bits): blocks, values = block_value_count(bpv) typ = get_type(bits) cast_start, cast_end = casts(typ) f.write(" @Override\n") f.write(" public void decode(long[] blocks, int blocksOffset, %s[] values, int valuesOffset, int iterations) {\n" % typ) if bits < bpv: f.write(" throw new UnsupportedOperationException();\n") else: f.write(" for (int i = 0; i < iterations; ++i) {\n") mask = (1 << bpv) - 1 if is_power_of_two(bpv): f.write(" final long block = blocks[blocksOffset++];\n") f.write(" for (int shift = %d; shift >= 0; shift -= %d) {\n" % (64 - bpv, bpv)) f.write(" values[valuesOffset++] = %s(block >>> shift) & %d%s;\n" % (cast_start, mask, cast_end)) f.write(" }\n") else: for i in range(0, values): block_offset = i * bpv // 64 bit_offset = (i * bpv) % 64 if bit_offset == 0: # start of block f.write(" final long block%d = blocks[blocksOffset++];\n" % block_offset); f.write(" values[valuesOffset++] = %sblock%d >>> %d%s;\n" % (cast_start, block_offset, 64 - bpv, cast_end)) elif bit_offset + bpv == 64: # end of block f.write(" values[valuesOffset++] = %sblock%d & %dL%s;\n" % (cast_start, block_offset, mask, cast_end)) elif bit_offset + bpv < 64: # middle of block f.write(" values[valuesOffset++] = %s(block%d >>> %d) & %dL%s;\n" % (cast_start, block_offset, 64 - bit_offset - bpv, mask, cast_end)) else: # value spans across 2 blocks mask1 = (1 << (64 - bit_offset)) - 1 shift1 = bit_offset + bpv - 64 shift2 = 64 - shift1 f.write(" final long block%d = blocks[blocksOffset++];\n" % (block_offset + 1)); f.write(" values[valuesOffset++] = %s((block%d & %dL) << %d) | (block%d >>> %d)%s;\n" % (cast_start, block_offset, mask1, shift1, block_offset + 1, shift2, cast_end)) f.write(" }\n") f.write(" }\n\n") byte_blocks, byte_values = block_value_count(bpv, 8) f.write(" @Override\n") f.write(" public void decode(byte[] blocks, int blocksOffset, %s[] values, int valuesOffset, int iterations) {\n" % typ) if bits < bpv: f.write(" throw new UnsupportedOperationException();\n") else: if is_power_of_two(bpv) and bpv < 8: f.write(" for (int j = 0; j < iterations; ++j) {\n") f.write(" final byte block = blocks[blocksOffset++];\n") for shift in range(8 - bpv, 0, -bpv): f.write(" values[valuesOffset++] = (block >>> %d) & %d;\n" % (shift, mask)) f.write(" values[valuesOffset++] = block & %d;\n" % mask) f.write(" }\n") elif bpv == 8: f.write(" for (int j = 0; j < iterations; ++j) {\n") f.write(" values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;\n") f.write(" }\n") elif is_power_of_two(bpv) and bpv > 8: f.write(" for (int j = 0; j < iterations; ++j) {\n") m = bits <= 32 and "0xFF" or "0xFFL" f.write(" values[valuesOffset++] =") for i in range(bpv // 8 - 1): f.write(" ((blocks[blocksOffset++] & %s) << %d) |" % (m, bpv - 8)) f.write(" (blocks[blocksOffset++] & %s);\n" % m) f.write(" }\n") else: f.write(" for (int i = 0; i < iterations; ++i) {\n") for i in range(0, byte_values): byte_start = i * bpv // 8 bit_start = (i * bpv) % 8 byte_end = ((i + 1) * bpv - 1) // 8 bit_end = ((i + 1) * bpv - 1) % 8 shift = lambda b: 8 * (byte_end - b - 1) + 1 + bit_end if bit_start == 0: f.write(" final %s byte%d = blocks[blocksOffset++] & 0xFF;\n" % (typ, byte_start)) for b in range(byte_start + 1, byte_end + 1): f.write(" final %s byte%d = blocks[blocksOffset++] & 0xFF;\n" % (typ, b)) f.write(" values[valuesOffset++] =") if byte_start == byte_end: if bit_start == 0: if bit_end == 7: f.write(" byte%d" % byte_start) else: f.write(" byte%d >>> %d" % (byte_start, 7 - bit_end)) else: if bit_end == 7: f.write(" byte%d & %d" % (byte_start, 2 ** (8 - bit_start) - 1)) else: f.write(" (byte%d >>> %d) & %d" % (byte_start, 7 - bit_end, 2 ** (bit_end - bit_start + 1) - 1)) else: if bit_start == 0: f.write(" (byte%d << %d)" % (byte_start, shift(byte_start))) else: f.write(" ((byte%d & %d) << %d)" % (byte_start, 2 ** (8 - bit_start) - 1, shift(byte_start))) for b in range(byte_start + 1, byte_end): f.write(" | (byte%d << %d)" % (b, shift(b))) if bit_end == 7: f.write(" | byte%d" % byte_end) else: f.write(" | (byte%d >>> %d)" % (byte_end, 7 - bit_end)) f.write(";\n") f.write(" }\n") f.write(" }\n\n") if __name__ == '__main__': f = open(OUTPUT_FILE, 'w') f.write(HEADER) f.write('\n') f.write('''/** * Efficient sequential read/write of packed integers. */\n''') f.write('abstract class BulkOperation implements PackedInts.Decoder, PackedInts.Encoder {\n') f.write(' private static final BulkOperation[] packedBulkOps = new BulkOperation[] {\n') for bpv in range(1, 65): if bpv > MAX_SPECIALIZED_BITS_PER_VALUE: f.write(' new BulkOperationPacked(%d),\n' % bpv) continue f2 = open('BulkOperationPacked%d.java' % bpv, 'w') f2.write(HEADER) if bpv == 64: f2.write('import java.nio.LongBuffer;\n') f2.write('import java.nio.ByteBuffer;\n') f2.write('\n') f2.write('''/** * Efficient sequential read/write of packed integers. */\n''') f2.write('final class BulkOperationPacked%d extends BulkOperationPacked {\n' % bpv) packed64(bpv, f2) f2.write('}\n') f2.close() f.write(' new BulkOperationPacked%d(),\n' % bpv) f.write(' };\n') f.write('\n') f.write(' // NOTE: this is sparse (some entries are null):\n') f.write(' private static final BulkOperation[] packedSingleBlockBulkOps = new BulkOperation[] {\n') for bpv in range(1, max(PACKED_64_SINGLE_BLOCK_BPV) + 1): if bpv in PACKED_64_SINGLE_BLOCK_BPV: f.write(' new BulkOperationPackedSingleBlock(%d),\n' % bpv) else: f.write(' null,\n') f.write(' };\n') f.write('\n') f.write("\n") f.write(" public static BulkOperation of(PackedInts.Format format, int bitsPerValue) {\n") f.write(" switch (format) {\n") f.write(" case PACKED:\n") f.write(" assert packedBulkOps[bitsPerValue - 1] != null;\n") f.write(" return packedBulkOps[bitsPerValue - 1];\n") f.write(" case PACKED_SINGLE_BLOCK:\n") f.write(" assert packedSingleBlockBulkOps[bitsPerValue - 1] != null;\n") f.write(" return packedSingleBlockBulkOps[bitsPerValue - 1];\n") f.write(" default:\n") f.write(" throw new AssertionError();\n") f.write(" }\n") f.write(" }\n") f.write(FOOTER) f.close()