mirror of https://github.com/apache/lucene.git
SOLR-7787 (jhll integration).
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr7787@1691350 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9342ddc392
commit
1842589815
|
@ -73,7 +73,6 @@ com.sun.jersey.version = 1.9
|
|||
/hsqldb/hsqldb = 1.8.0.10
|
||||
/io.airlift/slice = 0.10
|
||||
/io.netty/netty = 3.7.0.Final
|
||||
/it.unimi.dsi/fastutil = 6.5.11
|
||||
/jakarta-regexp/jakarta-regexp = 1.4
|
||||
/javax.activation/activation = 1.1.1
|
||||
/javax.inject/javax.inject= 1
|
||||
|
@ -85,7 +84,6 @@ com.sun.jersey.version = 1.9
|
|||
/log4j/log4j = 1.2.17
|
||||
/mecab/mecab-ipadic = 2.7.0-20070801
|
||||
/mecab/mecab-naist-jdic = 0.6.3b-20111013
|
||||
/net.agkn/hll = 1.6.0
|
||||
/net.arnx/jsonic = 1.2.7
|
||||
/net.sf.ehcache/ehcache-core = 2.4.4
|
||||
/net.sf.saxon/Saxon-HE = 9.6.0-2
|
||||
|
|
|
@ -13,6 +13,9 @@ including, but not limited to:
|
|||
- Apache Blur
|
||||
- Apache Hadoop
|
||||
|
||||
This product includes code forked from the Java-HLL library.
|
||||
Copyright (c) 2013 Aggregate Knowledge, Inc., https://github.com/aggregateknowledge/java-hll/
|
||||
|
||||
This product includes the JQuery JavaScript library created by John Resig.
|
||||
Copyright (c) 2010 John Resig, http://jquery.com/
|
||||
|
||||
|
|
|
@ -134,10 +134,6 @@
|
|||
<dependency org="org.antlr" name="antlr4-runtime" rev="${/org.antlr/antlr4-runtime}"/>
|
||||
<dependency org="io.airlift" name="slice" rev="${/io.airlift/slice}"/>
|
||||
|
||||
<!-- StatsComponents HLL Dependencies-->
|
||||
<dependency org="net.agkn" name="hll" rev="${/net.agkn/hll}" conf="compile->*"/>
|
||||
<dependency org="it.unimi.dsi" name="fastutil" rev="${/it.unimi.dsi/fastutil}" conf="compile->*"/>
|
||||
|
||||
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
|
||||
</dependencies>
|
||||
</ivy-module>
|
||||
|
|
|
@ -55,9 +55,9 @@ import org.apache.solr.search.QParserPlugin;
|
|||
import org.apache.solr.search.QueryParsing;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.util.hll.HLL;
|
||||
import org.apache.solr.util.hll.HLLType;
|
||||
|
||||
import net.agkn.hll.HLL;
|
||||
import net.agkn.hll.HLLType;
|
||||
import com.google.common.hash.Hashing;
|
||||
import com.google.common.hash.HashFunction;
|
||||
|
||||
|
@ -625,8 +625,8 @@ public class StatsField {
|
|||
* Creates an HllOptions based on the (local) params specified (if appropriate).
|
||||
*
|
||||
* @param localParams the LocalParams for this {@link StatsField}
|
||||
* @param field the field corrisponding to this {@link StatsField}, may be null if these stats are over a value source
|
||||
* @return the {@link HllOptions} to use basd on the params, or null if no {@link HLL} should be computed
|
||||
* @param field the field corresponding to this {@link StatsField}, may be null if these stats are over a value source
|
||||
* @return the {@link HllOptions} to use based on the params, or null if no {@link HLL} should be computed
|
||||
* @throws SolrException if there are invalid options
|
||||
*/
|
||||
public static HllOptions parseHllOptions(SolrParams localParams, SchemaField field)
|
||||
|
|
|
@ -33,12 +33,12 @@ import org.apache.solr.handler.component.StatsField.Stat;
|
|||
import org.apache.solr.schema.*;
|
||||
|
||||
import com.tdunning.math.stats.AVLTreeDigest;
|
||||
|
||||
import net.agkn.hll.HLL;
|
||||
import net.agkn.hll.HLLType;
|
||||
import com.google.common.hash.Hashing;
|
||||
import com.google.common.hash.HashFunction;
|
||||
|
||||
import org.apache.solr.util.hll.HLL;
|
||||
import org.apache.solr.util.hll.HLLType;
|
||||
|
||||
/**
|
||||
* Factory class for creating instance of
|
||||
* {@link org.apache.solr.handler.component.StatsValues}
|
||||
|
|
|
@ -23,8 +23,8 @@ import java.util.HashSet;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import net.agkn.hll.HLL;
|
||||
import net.agkn.hll.HLLType;
|
||||
import org.apache.solr.util.hll.HLL;
|
||||
import org.apache.solr.util.hll.HLLType;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import net.agkn.hll.HLL;
|
||||
import org.apache.solr.util.hll.HLL;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
|
|
|
@ -0,0 +1,173 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A corresponding deserializer for {@link BigEndianAscendingWordSerializer}.
|
||||
*/
|
||||
class BigEndianAscendingWordDeserializer implements IWordDeserializer {
|
||||
// The number of bits per byte.
|
||||
private static final int BITS_PER_BYTE = 8;
|
||||
|
||||
// long mask for the maximum value stored in a byte
|
||||
private static final long BYTE_MASK = (1L << BITS_PER_BYTE) - 1L;
|
||||
|
||||
// ************************************************************************
|
||||
// The length in bits of the words to be read.
|
||||
private final int wordLength;
|
||||
|
||||
// The byte array to which the words are serialized.
|
||||
private final byte[] bytes;
|
||||
|
||||
// The number of leading padding bytes in 'bytes' to be ignored.
|
||||
private final int bytePadding;
|
||||
|
||||
// The number of words that the byte array contains.
|
||||
private final int wordCount;
|
||||
|
||||
// The current read state.
|
||||
private int currentWordIndex;
|
||||
|
||||
// ========================================================================
|
||||
/**
|
||||
* @param wordLength the length in bits of the words to be deserialized. Must
|
||||
* be less than or equal to 64 and greater than or equal to 1.
|
||||
* @param bytePadding the number of leading bytes that pad the serialized words.
|
||||
* Must be greater than or equal to zero.
|
||||
* @param bytes the byte array containing the serialized words. Cannot be
|
||||
* <code>null</code>.
|
||||
*/
|
||||
public BigEndianAscendingWordDeserializer(final int wordLength, final int bytePadding, final byte[] bytes) {
|
||||
if((wordLength < 1) || (wordLength > 64)) {
|
||||
throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")");
|
||||
}
|
||||
|
||||
if(bytePadding < 0) {
|
||||
throw new IllegalArgumentException("Byte padding must be >= zero. (was: " + bytePadding + ")");
|
||||
}
|
||||
|
||||
this.wordLength = wordLength;
|
||||
this.bytes = bytes;
|
||||
this.bytePadding = bytePadding;
|
||||
|
||||
final int dataBytes = (bytes.length - bytePadding);
|
||||
final long dataBits = (dataBytes * BITS_PER_BYTE);
|
||||
|
||||
this.wordCount = (int)(dataBits/wordLength);
|
||||
|
||||
currentWordIndex = 0;
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IWordDeserializer#readWord()
|
||||
*/
|
||||
@Override
|
||||
public long readWord() {
|
||||
final long word = readWord(currentWordIndex);
|
||||
currentWordIndex++;
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
/**
|
||||
* Reads the word at the specified sequence position (zero-indexed).
|
||||
*
|
||||
* @param position the zero-indexed position of the word to be read. This
|
||||
* must be greater than or equal to zero.
|
||||
* @return the value of the serialized word at the specified position.
|
||||
*/
|
||||
private long readWord(final int position) {
|
||||
if(position < 0) {
|
||||
throw new ArrayIndexOutOfBoundsException(position);
|
||||
}
|
||||
|
||||
// First bit of the word
|
||||
final long firstBitIndex = (position * wordLength);
|
||||
final int firstByteIndex = (bytePadding + (int)(firstBitIndex / BITS_PER_BYTE));
|
||||
final int firstByteSkipBits = (int)(firstBitIndex % BITS_PER_BYTE);
|
||||
|
||||
// Last bit of the word
|
||||
final long lastBitIndex = (firstBitIndex + wordLength - 1);
|
||||
final int lastByteIndex = (bytePadding + (int)(lastBitIndex / BITS_PER_BYTE));
|
||||
final int lastByteBitsToConsume;
|
||||
|
||||
final int bitsAfterByteBoundary = (int)((lastBitIndex + 1) % BITS_PER_BYTE);
|
||||
// If the word terminates at the end of the last byte, consume the whole
|
||||
// last byte.
|
||||
if(bitsAfterByteBoundary == 0) {
|
||||
lastByteBitsToConsume = BITS_PER_BYTE;
|
||||
} else {
|
||||
// Otherwise, only consume what is necessary.
|
||||
lastByteBitsToConsume = bitsAfterByteBoundary;
|
||||
}
|
||||
|
||||
if(lastByteIndex >= bytes.length) {
|
||||
throw new ArrayIndexOutOfBoundsException("Word out of bounds of backing array.");
|
||||
}
|
||||
|
||||
// Accumulator
|
||||
long value = 0;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// First byte
|
||||
final int bitsRemainingInFirstByte = (BITS_PER_BYTE - firstByteSkipBits);
|
||||
final int bitsToConsumeInFirstByte = Math.min(bitsRemainingInFirstByte, wordLength);
|
||||
long firstByte = (long)bytes[firstByteIndex];
|
||||
|
||||
// Mask off the bits to skip in the first byte.
|
||||
final long firstByteMask = ((1L << bitsRemainingInFirstByte) - 1L);
|
||||
firstByte &= firstByteMask;
|
||||
// Right-align relevant bits of first byte.
|
||||
firstByte >>>= (bitsRemainingInFirstByte - bitsToConsumeInFirstByte);
|
||||
|
||||
value |= firstByte;
|
||||
|
||||
// If the first byte contains the whole word, short-circuit.
|
||||
if(firstByteIndex == lastByteIndex) {
|
||||
return value;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Middle bytes
|
||||
final int middleByteCount = (lastByteIndex - firstByteIndex - 1);
|
||||
for(int i=0; i<middleByteCount; i++) {
|
||||
final long middleByte = (bytes[firstByteIndex + i + 1] & BYTE_MASK);
|
||||
// Push middle byte onto accumulator.
|
||||
value <<= BITS_PER_BYTE;
|
||||
value |= middleByte;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Last byte
|
||||
long lastByte = (bytes[lastByteIndex] & BYTE_MASK);
|
||||
lastByte >>= (BITS_PER_BYTE - lastByteBitsToConsume);
|
||||
value <<= lastByteBitsToConsume;
|
||||
value |= lastByte;
|
||||
return value;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IWordDeserializer#totalWordCount()
|
||||
*/
|
||||
@Override
|
||||
public int totalWordCount() {
|
||||
return wordCount;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,174 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A serializer that writes a sequence of fixed bit-width 'words' to a byte array.
|
||||
* Bitwise OR is used to write words into bytes, so a low bit in a word is also
|
||||
* a low bit in a byte. However, a high byte in a word is written at a lower index
|
||||
* in the array than a low byte in a word. The first word is written at the lowest
|
||||
* array index. Each serializer is one time use and returns its backing byte
|
||||
* array.<p/>
|
||||
*
|
||||
* This encoding was chosen so that when reading bytes as octets in the typical
|
||||
* first-octet-is-the-high-nibble fashion, an octet-to-binary conversion
|
||||
* would yield a high-to-low, left-to-right view of the "short words".<p/>
|
||||
*
|
||||
* Example:<p/>
|
||||
*
|
||||
* Say short words are 5 bits wide. Our word sequence is the values
|
||||
* <code>[31, 1, 5]</code>. In big-endian binary format, the values are
|
||||
* <code>[0b11111, 0b00001, 0b00101]</code>. We use 15 of 16 bits in two bytes
|
||||
* and pad the last (lowest) bit of the last byte with a zero:
|
||||
*
|
||||
* <code>
|
||||
* [0b11111000, 0b01001010] = [0xF8, 0x4A]
|
||||
* </code>.
|
||||
*/
|
||||
class BigEndianAscendingWordSerializer implements IWordSerializer {
|
||||
// The number of bits per byte.
|
||||
private static final int BITS_PER_BYTE = 8;
|
||||
|
||||
// ************************************************************************
|
||||
// The length in bits of the words to be written.
|
||||
private final int wordLength;
|
||||
// The number of words to be written.
|
||||
private final int wordCount;
|
||||
|
||||
// The byte array to which the words are serialized.
|
||||
private final byte[] bytes;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Write state
|
||||
// Number of bits that remain writable in the current byte.
|
||||
private int bitsLeftInByte;
|
||||
// Index of byte currently being written to.
|
||||
private int byteIndex;
|
||||
// Number of words written.
|
||||
private int wordsWritten;
|
||||
|
||||
// ========================================================================
|
||||
/**
|
||||
* @param wordLength the length in bits of the words to be serialized. Must
|
||||
* be greater than or equal to 1 and less than or equal to 64.
|
||||
* @param wordCount the number of words to be serialized. Must be greater than
|
||||
* or equal to zero.
|
||||
* @param bytePadding the number of leading bytes that should pad the
|
||||
* serialized words. Must be greater than or equal to zero.
|
||||
*/
|
||||
public BigEndianAscendingWordSerializer(final int wordLength, final int wordCount, final int bytePadding) {
|
||||
if((wordLength < 1) || (wordLength > 64)) {
|
||||
throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")");
|
||||
}
|
||||
if(wordCount < 0) {
|
||||
throw new IllegalArgumentException("Word count must be >= 0. (was: " + wordCount + ")");
|
||||
}
|
||||
if(bytePadding < 0) {
|
||||
throw new IllegalArgumentException("Byte padding must be must be >= 0. (was: " + bytePadding + ")");
|
||||
}
|
||||
|
||||
this.wordLength = wordLength;
|
||||
this.wordCount = wordCount;
|
||||
|
||||
final long bitsRequired = (wordLength * wordCount);
|
||||
final boolean leftoverBits = ((bitsRequired % BITS_PER_BYTE) != 0);
|
||||
final int bytesRequired = (int)(bitsRequired / BITS_PER_BYTE) + (leftoverBits ? 1 : 0) + bytePadding;
|
||||
bytes = new byte[bytesRequired];
|
||||
|
||||
bitsLeftInByte = BITS_PER_BYTE;
|
||||
byteIndex = bytePadding;
|
||||
wordsWritten = 0;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IWordSerializer#writeWord(long)
|
||||
* @throws RuntimeException if the number of words written is greater than the
|
||||
* <code>wordCount</code> parameter in the constructor.
|
||||
*/
|
||||
@Override
|
||||
public void writeWord(final long word) {
|
||||
if(wordsWritten == wordCount) {
|
||||
throw new RuntimeException("Cannot write more words, backing array full!");
|
||||
}
|
||||
|
||||
int bitsLeftInWord = wordLength;
|
||||
|
||||
while(bitsLeftInWord > 0) {
|
||||
// Move to the next byte if the current one is fully packed.
|
||||
if(bitsLeftInByte == 0) {
|
||||
byteIndex++;
|
||||
bitsLeftInByte = BITS_PER_BYTE;
|
||||
}
|
||||
|
||||
final long consumedMask;
|
||||
if(bitsLeftInWord == 64) {
|
||||
consumedMask = ~0L;
|
||||
} else {
|
||||
consumedMask = ((1L << bitsLeftInWord) - 1L);
|
||||
}
|
||||
|
||||
// Fix how many bits will be written in this cycle. Choose the
|
||||
// smaller of the remaining bits in the word or byte.
|
||||
final int numberOfBitsToWrite = Math.min(bitsLeftInByte, bitsLeftInWord);
|
||||
final int bitsInByteRemainingAfterWrite = (bitsLeftInByte - numberOfBitsToWrite);
|
||||
|
||||
// In general, we write the highest bits of the word first, so we
|
||||
// strip the highest bits that were consumed in previous cycles.
|
||||
final long remainingBitsOfWordToWrite = (word & consumedMask);
|
||||
|
||||
final long bitsThatTheByteCanAccept;
|
||||
// If there is more left in the word than can be written to this
|
||||
// byte, shift off the bits that can't be written off the bottom.
|
||||
if(bitsLeftInWord > numberOfBitsToWrite) {
|
||||
bitsThatTheByteCanAccept = (remainingBitsOfWordToWrite >>> (bitsLeftInWord - bitsLeftInByte));
|
||||
} else {
|
||||
// If the byte can accept all remaining bits, there is no need
|
||||
// to shift off the bits that won't be written in this cycle.
|
||||
bitsThatTheByteCanAccept = remainingBitsOfWordToWrite;
|
||||
}
|
||||
|
||||
// Align the word bits to write up against the byte bits that have
|
||||
// already been written. This shift may do nothing if the remainder
|
||||
// of the byte is being consumed in this cycle.
|
||||
final long alignedBits = (bitsThatTheByteCanAccept << bitsInByteRemainingAfterWrite);
|
||||
|
||||
// Update the byte with the alignedBits.
|
||||
bytes[byteIndex] |= (byte)alignedBits;
|
||||
|
||||
// Update state with bit count written.
|
||||
bitsLeftInWord -= numberOfBitsToWrite;
|
||||
bitsLeftInByte = bitsInByteRemainingAfterWrite;
|
||||
}
|
||||
|
||||
wordsWritten ++;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IWordSerializer#getBytes()
|
||||
* @throws RuntimeException if the number of words written is fewer than the
|
||||
* <code>wordCount</code> parameter in the constructor.
|
||||
*/
|
||||
@Override
|
||||
public byte[] getBytes() {
|
||||
if(wordsWritten < wordCount) {
|
||||
throw new RuntimeException("Not all words have been written! (" + wordsWritten + "/" + wordCount + ")");
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A collection of bit utilities.
|
||||
*/
|
||||
class BitUtil {
|
||||
/**
|
||||
* The set of least-significant bits for a given <code>byte</code>. <code>-1</code>
|
||||
* is used if no bits are set (so as to not be confused with "index of zero"
|
||||
* meaning that the least significant bit is the 0th (1st) bit).
|
||||
*
|
||||
* @see #leastSignificantBit(long)
|
||||
*/
|
||||
private static final int[] LEAST_SIGNIFICANT_BIT = {
|
||||
-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
|
||||
};
|
||||
|
||||
/**
|
||||
* Computes the least-significant bit of the specified <code>long</code>
|
||||
* that is set to <code>1</code>. Zero-indexed.
|
||||
*
|
||||
* @param value the <code>long</code> whose least-significant bit is desired.
|
||||
* @return the least-significant bit of the specified <code>long</code>.
|
||||
* <code>-1</code> is returned if there are no bits set.
|
||||
*/
|
||||
// REF: http://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
|
||||
// REF: http://www-graphics.stanford.edu/~seander/bithacks.html
|
||||
public static int leastSignificantBit(final long value) {
|
||||
if(value == 0L) return -1/*by contract*/;
|
||||
if((value & 0xFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 0) & 0xFF)] + 0;
|
||||
if((value & 0xFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 8) & 0xFF)] + 8;
|
||||
if((value & 0xFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 16) & 0xFF)] + 16;
|
||||
if((value & 0xFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 24) & 0xFF)] + 24;
|
||||
if((value & 0xFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 32) & 0xFF)] + 32;
|
||||
if((value & 0xFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 40) & 0xFF)] + 40;
|
||||
if((value & 0xFFFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 48) & 0xFF)] + 48;
|
||||
return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 56) & 0xFFL)] + 56;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,259 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A vector (array) of bits that is accessed in units ("registers") of <code>width</code>
|
||||
* bits which are stored as 64bit "words" (<code>long</code>s). In this context
|
||||
* a register is at most 64bits.
|
||||
*/
|
||||
class BitVector implements Cloneable {
|
||||
// NOTE: in this context, a word is 64bits
|
||||
|
||||
// rather than doing division to determine how a bit index fits into 64bit
|
||||
// words (i.e. longs), bit shifting is used
|
||||
private static final int LOG2_BITS_PER_WORD = 6/*=>64bits*/;
|
||||
private static final int BITS_PER_WORD = 1 << LOG2_BITS_PER_WORD;
|
||||
private static final int BITS_PER_WORD_MASK = BITS_PER_WORD - 1;
|
||||
|
||||
// ditto from above but for bytes (for output)
|
||||
private static final int LOG2_BITS_PER_BYTE = 3/*=>8bits*/;
|
||||
public static final int BITS_PER_BYTE = 1 << LOG2_BITS_PER_BYTE;
|
||||
|
||||
// ========================================================================
|
||||
public static final int BYTES_PER_WORD = 8/*8 bytes in a long*/;
|
||||
|
||||
// ************************************************************************
|
||||
// 64bit words
|
||||
private final long[] words;
|
||||
public final long[] words() { return words; }
|
||||
public final int wordCount() { return words.length; }
|
||||
public final int byteCount() { return wordCount() * BYTES_PER_WORD; }
|
||||
|
||||
// the width of a register in bits (this cannot be more than 64 (the word size))
|
||||
private final int registerWidth;
|
||||
public final int registerWidth() { return registerWidth; }
|
||||
|
||||
private final long count;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
private final long registerMask;
|
||||
|
||||
// ========================================================================
|
||||
/**
|
||||
* @param width the width of each register. This cannot be negative or
|
||||
* zero or greater than 63 (the signed word size).
|
||||
* @param count the number of registers. This cannot be negative or zero
|
||||
*/
|
||||
public BitVector(final int width, final long count) {
|
||||
// ceil((width * count)/BITS_PER_WORD)
|
||||
this.words = new long[(int)(((width * count) + BITS_PER_WORD_MASK) >>> LOG2_BITS_PER_WORD)];
|
||||
this.registerWidth = width;
|
||||
this.count = count;
|
||||
|
||||
this.registerMask = (1L << width) - 1;
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
/**
|
||||
* @param registerIndex the index of the register whose value is to be
|
||||
* retrieved. This cannot be negative.
|
||||
* @return the value at the specified register index
|
||||
* @see #setRegister(long, long)
|
||||
* @see #setMaxRegister(long, long)
|
||||
*/
|
||||
// NOTE: if this changes then setMaxRegister() must change
|
||||
public long getRegister(final long registerIndex) {
|
||||
final long bitIndex = registerIndex * registerWidth;
|
||||
final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
|
||||
final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
|
||||
final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
|
||||
|
||||
if(firstWordIndex == secondWordIndex)
|
||||
return ((words[firstWordIndex] >>> bitRemainder) & registerMask);
|
||||
/* else -- register spans words */
|
||||
return (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/
|
||||
| (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param registerIndex the index of the register whose value is to be set.
|
||||
* This cannot be negative
|
||||
* @param value the value to set in the register
|
||||
* @see #getRegister(long)
|
||||
* @see #setMaxRegister(long, long)
|
||||
*/
|
||||
// NOTE: if this changes then setMaxRegister() must change
|
||||
public void setRegister(final long registerIndex, final long value) {
|
||||
final long bitIndex = registerIndex * registerWidth;
|
||||
final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
|
||||
final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
|
||||
final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
|
||||
|
||||
final long words[] = this.words/*for convenience/performance*/;
|
||||
if(firstWordIndex == secondWordIndex) {
|
||||
// clear then set
|
||||
words[firstWordIndex] &= ~(registerMask << bitRemainder);
|
||||
words[firstWordIndex] |= (value << bitRemainder);
|
||||
} else {/*register spans words*/
|
||||
// clear then set each partial word
|
||||
words[firstWordIndex] &= (1L << bitRemainder) - 1;
|
||||
words[firstWordIndex] |= (value << bitRemainder);
|
||||
|
||||
words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder));
|
||||
words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder));
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
/**
|
||||
* @return a <code>LongIterator</code> for iterating starting at the register
|
||||
* with index zero. This will never be <code>null</code>.
|
||||
*/
|
||||
public LongIterator registerIterator() {
|
||||
return new LongIterator() {
|
||||
final int registerWidth = BitVector.this.registerWidth;
|
||||
final long[] words = BitVector.this.words;
|
||||
final long registerMask = BitVector.this.registerMask;
|
||||
|
||||
// register setup
|
||||
long registerIndex = 0;
|
||||
int wordIndex = 0;
|
||||
int remainingWordBits = BITS_PER_WORD;
|
||||
long word = words[wordIndex];
|
||||
|
||||
@Override public long next() {
|
||||
long register;
|
||||
if(remainingWordBits >= registerWidth) {
|
||||
register = word & registerMask;
|
||||
|
||||
// shift to the next register
|
||||
word >>>= registerWidth;
|
||||
remainingWordBits -= registerWidth;
|
||||
} else { /*insufficient bits remaining in current word*/
|
||||
wordIndex++/*move to the next word*/;
|
||||
|
||||
register = (word | (words[wordIndex] << remainingWordBits)) & registerMask;
|
||||
|
||||
// shift to the next partial register (word)
|
||||
word = words[wordIndex] >>> (registerWidth - remainingWordBits);
|
||||
remainingWordBits += BITS_PER_WORD - registerWidth;
|
||||
}
|
||||
registerIndex++;
|
||||
return register;
|
||||
}
|
||||
|
||||
@Override public boolean hasNext() {
|
||||
return registerIndex < count;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// composite accessors
|
||||
/**
|
||||
* Sets the value of the specified index register if and only if the specified
|
||||
* value is greater than the current value in the register. This is equivalent
|
||||
* to but much more performant than:<p/>
|
||||
*
|
||||
* <pre>vector.setRegister(index, Math.max(vector.getRegister(index), value));</pre>
|
||||
*
|
||||
* @param registerIndex the index of the register whose value is to be set.
|
||||
* This cannot be negative
|
||||
* @param value the value to set in the register if and only if this value
|
||||
* is greater than the current value in the register
|
||||
* @return <code>true</code> if and only if the specified value is greater
|
||||
* than or equal to the current register value. <code>false</code>
|
||||
* otherwise.
|
||||
* @see #getRegister(long)
|
||||
* @see #setRegister(long, long)
|
||||
* @see java.lang.Math#max(long, long)
|
||||
*/
|
||||
// NOTE: if this changes then setRegister() must change
|
||||
public boolean setMaxRegister(final long registerIndex, final long value) {
|
||||
final long bitIndex = registerIndex * registerWidth;
|
||||
final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
|
||||
final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
|
||||
final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
|
||||
|
||||
// NOTE: matches getRegister()
|
||||
final long registerValue;
|
||||
final long words[] = this.words/*for convenience/performance*/;
|
||||
if(firstWordIndex == secondWordIndex)
|
||||
registerValue = ((words[firstWordIndex] >>> bitRemainder) & registerMask);
|
||||
else /*register spans words*/
|
||||
registerValue = (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/
|
||||
| (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask;
|
||||
|
||||
// determine which is the larger and update as necessary
|
||||
if(value > registerValue) {
|
||||
// NOTE: matches setRegister()
|
||||
if(firstWordIndex == secondWordIndex) {
|
||||
// clear then set
|
||||
words[firstWordIndex] &= ~(registerMask << bitRemainder);
|
||||
words[firstWordIndex] |= (value << bitRemainder);
|
||||
} else {/*register spans words*/
|
||||
// clear then set each partial word
|
||||
words[firstWordIndex] &= (1L << bitRemainder) - 1;
|
||||
words[firstWordIndex] |= (value << bitRemainder);
|
||||
|
||||
words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder));
|
||||
words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder));
|
||||
}
|
||||
} /* else -- the register value is greater (or equal) so nothing needs to be done */
|
||||
|
||||
return (value >= registerValue);
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
/**
|
||||
* Fills this bit vector with the specified bit value. This can be used to
|
||||
* clear the vector by specifying <code>0</code>.
|
||||
*
|
||||
* @param value the value to set all bits to (only the lowest bit is used)
|
||||
*/
|
||||
public void fill(final long value) {
|
||||
for(long i=0; i<count; i++) {
|
||||
setRegister(i, value);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
/**
|
||||
* Serializes the registers of the vector using the specified serializer.
|
||||
*
|
||||
* @param serializer the serializer to use. This cannot be <code>null</code>.
|
||||
*/
|
||||
public void getRegisterContents(final IWordSerializer serializer) {
|
||||
for(final LongIterator iter = registerIterator(); iter.hasNext();) {
|
||||
serializer.writeWord(iter.next());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a deep copy of this vector.
|
||||
*
|
||||
* @see java.lang.Object#clone()
|
||||
*/
|
||||
@Override
|
||||
public BitVector clone() {
|
||||
final BitVector copy = new BitVector(registerWidth, count);
|
||||
System.arraycopy(words, 0, copy.words, 0, words.length);
|
||||
return copy;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,138 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A concrete {@link IHLLMetadata} implemented as a simple struct.
|
||||
*
|
||||
* @author timon
|
||||
*/
|
||||
class HLLMetadata implements IHLLMetadata {
|
||||
private final int schemaVersion;
|
||||
private final HLLType type;
|
||||
private final int registerCountLog2;
|
||||
private final int registerWidth;
|
||||
private final int log2ExplicitCutoff;
|
||||
private final boolean explicitOff;
|
||||
private final boolean explicitAuto;
|
||||
private final boolean sparseEnabled;
|
||||
|
||||
/**
|
||||
* @param schemaVersion the schema version number of the HLL. This must
|
||||
* be greater than or equal to zero.
|
||||
* @param type the {@link HLLType type} of the HLL. This cannot
|
||||
* be <code>null</code>.
|
||||
* @param registerCountLog2 the log-base-2 register count parameter for
|
||||
* probabilistic HLLs. This must be greater than or equal to zero.
|
||||
* @param registerWidth the register width parameter for probabilistic
|
||||
* HLLs. This must be greater than or equal to zero.
|
||||
* @param log2ExplicitCutoff the log-base-2 of the explicit cardinality cutoff,
|
||||
* if it is explicitly defined. (If <code>explicitOff</code> or
|
||||
* <code>explicitAuto</code> is <code>true</code> then this has no
|
||||
* meaning.)
|
||||
* @param explicitOff the flag for 'explicit off'-mode, where the
|
||||
* {@link HLLType#EXPLICIT} representation is not used. Both this and
|
||||
* <code>explicitAuto</code> cannot be <code>true</code> at the same
|
||||
* time.
|
||||
* @param explicitAuto the flag for 'explicit auto'-mode, where the
|
||||
* {@link HLLType#EXPLICIT} representation's promotion cutoff is
|
||||
* determined based on in-memory size automatically. Both this and
|
||||
* <code>explicitOff</code> cannot be <code>true</code> at the same
|
||||
* time.
|
||||
* @param sparseEnabled the flag for 'sparse-enabled'-mode, where the
|
||||
* {@link HLLType#SPARSE} representation is used.
|
||||
*/
|
||||
public HLLMetadata(final int schemaVersion,
|
||||
final HLLType type,
|
||||
final int registerCountLog2,
|
||||
final int registerWidth,
|
||||
final int log2ExplicitCutoff,
|
||||
final boolean explicitOff,
|
||||
final boolean explicitAuto,
|
||||
final boolean sparseEnabled) {
|
||||
this.schemaVersion = schemaVersion;
|
||||
this.type = type;
|
||||
this.registerCountLog2 = registerCountLog2;
|
||||
this.registerWidth = registerWidth;
|
||||
this.log2ExplicitCutoff = log2ExplicitCutoff;
|
||||
this.explicitOff = explicitOff;
|
||||
this.explicitAuto = explicitAuto;
|
||||
this.sparseEnabled = sparseEnabled;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IHLLMetadata#schemaVersion()
|
||||
*/
|
||||
@Override
|
||||
public int schemaVersion() { return schemaVersion; }
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IHLLMetadata#HLLType()
|
||||
*/
|
||||
@Override
|
||||
public HLLType HLLType() { return type; }
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IHLLMetadata#registerCountLog2()
|
||||
*/
|
||||
@Override
|
||||
public int registerCountLog2() { return registerCountLog2; }
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IHLLMetadata#registerWidth()
|
||||
*/
|
||||
@Override
|
||||
public int registerWidth() { return registerWidth; }
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff()
|
||||
*/
|
||||
@Override
|
||||
public int log2ExplicitCutoff() { return log2ExplicitCutoff; }
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IHLLMetadata#explicitOff()
|
||||
*/
|
||||
@Override
|
||||
public boolean explicitOff() {
|
||||
return explicitOff;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IHLLMetadata#explicitAuto()
|
||||
* @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff()
|
||||
*/
|
||||
@Override
|
||||
public boolean explicitAuto() {
|
||||
return explicitAuto;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.IHLLMetadata#sparseEnabled()
|
||||
*/
|
||||
@Override
|
||||
public boolean sparseEnabled() { return sparseEnabled; }
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see java.lang.Object#toString()
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "<HLLMetadata schemaVersion: " + this.schemaVersion + ", type: " + this.type.toString() + ", registerCountLog2: " + this.registerCountLog2 + ", registerWidth: " + this.registerWidth + ", log2ExplicitCutoff: " + this.log2ExplicitCutoff + ", explicitOff: " + this.explicitOff + ", explicitAuto: " +this.explicitAuto + ">";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The types of algorithm/data structure that {@link HLL} can utilize. For more
|
||||
* information, see the Javadoc for {@link HLL}.
|
||||
*/
|
||||
public enum HLLType {
|
||||
EMPTY,
|
||||
EXPLICIT,
|
||||
SPARSE,
|
||||
FULL;
|
||||
}
|
|
@ -0,0 +1,199 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Static functions for computing constants and parameters used in the HLL
|
||||
* algorithm.
|
||||
*/
|
||||
final class HLLUtil {
|
||||
/**
|
||||
* Precomputed <code>pwMaxMask</code> values indexed by <code>registerSizeInBits</code>.
|
||||
* Calculated with this formula:
|
||||
* <pre>
|
||||
* int maxRegisterValue = (1 << registerSizeInBits) - 1;
|
||||
* // Mask with all bits set except for (maxRegisterValue - 1) least significant bits (see #addRaw())
|
||||
* return ~((1L << (maxRegisterValue - 1)) - 1);
|
||||
* </pre>
|
||||
*
|
||||
* @see #pwMaxMask(int)
|
||||
*/
|
||||
private static final long[] PW_MASK = {
|
||||
~((1L << (((1 << 0) - 1) - 1)) - 1),
|
||||
~((1L << (((1 << 1) - 1) - 1)) - 1),
|
||||
~((1L << (((1 << 2) - 1) - 1)) - 1),
|
||||
~((1L << (((1 << 3) - 1) - 1)) - 1),
|
||||
~((1L << (((1 << 4) - 1) - 1)) - 1),
|
||||
~((1L << (((1 << 5) - 1) - 1)) - 1),
|
||||
~((1L << (((1 << 6) - 1) - 1)) - 1),
|
||||
~((1L << (((1 << 7) - 1) - 1)) - 1),
|
||||
~((1L << (((1 << 8) - 1) - 1)) - 1)
|
||||
};
|
||||
|
||||
/**
|
||||
* Precomputed <code>twoToL</code> values indexed by a linear combination of
|
||||
* <code>regWidth</code> and <code>log2m</code>.
|
||||
*
|
||||
* The array is one-dimensional and can be accessed by using index
|
||||
* <code>(REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m</code>
|
||||
* for <code>regWidth</code> and <code>log2m</code> between the specified
|
||||
* <code>HLL.{MINIMUM,MAXIMUM}_{REGWIDTH,LOG2M}_PARAM</code> constants.
|
||||
*
|
||||
* @see #largeEstimator(int, int, double)
|
||||
* @see #largeEstimatorCutoff(int, int)
|
||||
* @see "<a href='http://research.neustar.biz/2013/01/24/hyperloglog-googles-take-on-engineering-hll/'>Blog post with section on 2^L</a>"
|
||||
*/
|
||||
private static final double[] TWO_TO_L = new double[(HLL.MAXIMUM_REGWIDTH_PARAM + 1) * (HLL.MAXIMUM_LOG2M_PARAM + 1)];
|
||||
|
||||
/**
|
||||
* Spacing constant used to compute offsets into {@link #TWO_TO_L}.
|
||||
*/
|
||||
private static final int REG_WIDTH_INDEX_MULTIPLIER = HLL.MAXIMUM_LOG2M_PARAM + 1;
|
||||
|
||||
static {
|
||||
for(int regWidth = HLL.MINIMUM_REGWIDTH_PARAM; regWidth <= HLL.MAXIMUM_REGWIDTH_PARAM; regWidth++) {
|
||||
for(int log2m = HLL.MINIMUM_LOG2M_PARAM ; log2m <= HLL.MAXIMUM_LOG2M_PARAM; log2m++) {
|
||||
int maxRegisterValue = (1 << regWidth) - 1;
|
||||
|
||||
// Since 1 is added to p(w) in the insertion algorithm, only
|
||||
// (maxRegisterValue - 1) bits are inspected hence the hash
|
||||
// space is one power of two smaller.
|
||||
final int pwBits = (maxRegisterValue - 1);
|
||||
final int totalBits = (pwBits + log2m);
|
||||
final double twoToL = Math.pow(2, totalBits);
|
||||
TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m] = twoToL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ************************************************************************
|
||||
/**
|
||||
* Computes the bit-width of HLL registers necessary to estimate a set of
|
||||
* the specified cardinality.
|
||||
*
|
||||
* @param expectedUniqueElements an upper bound on the number of unique
|
||||
* elements that are expected. This must be greater than zero.
|
||||
* @return a register size in bits (i.e. <code>log2(log2(n))</code>)
|
||||
*/
|
||||
public static int registerBitSize(final long expectedUniqueElements) {
|
||||
return Math.max(HLL.MINIMUM_REGWIDTH_PARAM,
|
||||
(int)Math.ceil(NumberUtil.log2(NumberUtil.log2(expectedUniqueElements))));
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
/**
|
||||
* Computes the 'alpha-m-squared' constant used by the HyperLogLog algorithm.
|
||||
*
|
||||
* @param m this must be a power of two, cannot be less than
|
||||
* 16 (2<sup>4</sup>), and cannot be greater than 65536 (2<sup>16</sup>).
|
||||
* @return gamma times <code>registerCount</code> squared where gamma is
|
||||
* based on the value of <code>registerCount</code>.
|
||||
* @throws IllegalArgumentException if <code>registerCount</code> is less
|
||||
* than 16.
|
||||
*/
|
||||
public static double alphaMSquared(final int m) {
|
||||
switch(m) {
|
||||
case 1/*2^0*/:
|
||||
case 2/*2^1*/:
|
||||
case 4/*2^2*/:
|
||||
case 8/*2^3*/:
|
||||
throw new IllegalArgumentException("'m' cannot be less than 16 (" + m + " < 16).");
|
||||
|
||||
case 16/*2^4*/:
|
||||
return 0.673 * m * m;
|
||||
|
||||
case 32/*2^5*/:
|
||||
return 0.697 * m * m;
|
||||
|
||||
case 64/*2^6*/:
|
||||
return 0.709 * m * m;
|
||||
|
||||
default/*>2^6*/:
|
||||
return (0.7213 / (1.0 + 1.079 / m)) * m * m;
|
||||
}
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
/**
|
||||
* Computes a mask that prevents overflow of HyperLogLog registers.
|
||||
*
|
||||
* @param registerSizeInBits the size of the HLL registers, in bits.
|
||||
* @return mask a <code>long</code> mask to prevent overflow of the registers
|
||||
* @see #registerBitSize(long)
|
||||
*/
|
||||
public static long pwMaxMask(final int registerSizeInBits) {
|
||||
return PW_MASK[registerSizeInBits];
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
/**
|
||||
* The cutoff for using the "small range correction" formula, in the
|
||||
* HyperLogLog algorithm.
|
||||
*
|
||||
* @param m the number of registers in the HLL. <em>m<em> in the paper.
|
||||
* @return the cutoff for the small range correction.
|
||||
* @see #smallEstimator(int, int)
|
||||
*/
|
||||
public static double smallEstimatorCutoff(final int m) {
|
||||
return ((double)m * 5) / 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* The "small range correction" formula from the HyperLogLog algorithm. Only
|
||||
* appropriate if both the estimator is smaller than <pre>(5/2) * m</pre> and
|
||||
* there are still registers that have the zero value.
|
||||
*
|
||||
* @param m the number of registers in the HLL. <em>m<em> in the paper.
|
||||
* @param numberOfZeroes the number of registers with value zero. <em>V</em>
|
||||
* in the paper.
|
||||
* @return a corrected cardinality estimate.
|
||||
*/
|
||||
public static double smallEstimator(final int m, final int numberOfZeroes) {
|
||||
return m * Math.log((double)m / numberOfZeroes);
|
||||
}
|
||||
|
||||
/**
|
||||
* The cutoff for using the "large range correction" formula, from the
|
||||
* HyperLogLog algorithm, adapted for 64 bit hashes.
|
||||
*
|
||||
* @param log2m log-base-2 of the number of registers in the HLL. <em>b<em> in the paper.
|
||||
* @param registerSizeInBits the size of the HLL registers, in bits.
|
||||
* @return the cutoff for the large range correction.
|
||||
* @see #largeEstimator(int, int, double)
|
||||
* @see "<a href='http://research.neustar.biz/2013/01/24/hyperloglog-googles-take-on-engineering-hll/'>Blog post with section on 64 bit hashes and 'large range correction' cutoff</a>"
|
||||
*/
|
||||
public static double largeEstimatorCutoff(final int log2m, final int registerSizeInBits) {
|
||||
return (TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m]) / 30.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* The "large range correction" formula from the HyperLogLog algorithm, adapted
|
||||
* for 64 bit hashes. Only appropriate for estimators whose value exceeds
|
||||
* the return of {@link #largeEstimatorCutoff(int, int)}.
|
||||
*
|
||||
* @param log2m log-base-2 of the number of registers in the HLL. <em>b<em> in the paper.
|
||||
* @param registerSizeInBits the size of the HLL registers, in bits.
|
||||
* @param estimator the original estimator ("E" in the paper).
|
||||
* @return a corrected cardinality estimate.
|
||||
* @see "<a href='http://research.neustar.biz/2013/01/24/hyperloglog-googles-take-on-engineering-hll/'>Blog post with section on 64 bit hashes and 'large range correction'</a>"
|
||||
*/
|
||||
public static double largeEstimator(final int log2m, final int registerSizeInBits, final double estimator) {
|
||||
final double twoToL = TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m];
|
||||
return -1 * twoToL * Math.log(1.0 - (estimator/twoToL));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The metadata and parameters associated with a HLL.
|
||||
*/
|
||||
interface IHLLMetadata {
|
||||
/**
|
||||
* @return the schema version of the HLL. This will never be <code>null</code>.
|
||||
*/
|
||||
int schemaVersion();
|
||||
|
||||
/**
|
||||
* @return the type of the HLL. This will never be <code>null</code>.
|
||||
*/
|
||||
HLLType HLLType();
|
||||
|
||||
/**
|
||||
* @return the log-base-2 of the register count parameter of the HLL. This
|
||||
* will always be greater than or equal to 4 and less than or equal
|
||||
* to 31.
|
||||
*/
|
||||
int registerCountLog2();
|
||||
|
||||
/**
|
||||
* @return the register width parameter of the HLL. This will always be
|
||||
* greater than or equal to 1 and less than or equal to 8.
|
||||
*/
|
||||
int registerWidth();
|
||||
|
||||
/**
|
||||
* @return the log-base-2 of the explicit cutoff cardinality. This will always
|
||||
* be greater than or equal to zero and less than 31, per the specification.
|
||||
*/
|
||||
int log2ExplicitCutoff();
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if the {@link HLLType#EXPLICIT} representation
|
||||
* has been disabled. <code>false</code> otherwise.
|
||||
*/
|
||||
boolean explicitOff();
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if the {@link HLLType#EXPLICIT} representation
|
||||
* cutoff cardinality is set to be automatically chosen,
|
||||
* <code>false</code> otherwise.
|
||||
*/
|
||||
boolean explicitAuto();
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if the {@link HLLType#SPARSE} representation
|
||||
* is enabled.
|
||||
*/
|
||||
boolean sparseEnabled();
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A serialization schema for HLLs. Reads and writes HLL metadata to
|
||||
* and from <code>byte[]</code> representations.
|
||||
*
|
||||
* @author timon
|
||||
*/
|
||||
interface ISchemaVersion {
|
||||
/**
|
||||
* The number of metadata bytes required for a serialized HLL of the
|
||||
* specified type.
|
||||
*
|
||||
* @param type the type of the serialized HLL
|
||||
* @return the number of padding bytes needed in order to fully accommodate
|
||||
* the needed metadata.
|
||||
*/
|
||||
int paddingBytes(HLLType type);
|
||||
|
||||
/**
|
||||
* Writes metadata bytes to serialized HLL.
|
||||
*
|
||||
* @param bytes the padded data bytes of the HLL
|
||||
* @param metadata the metadata to write to the padding bytes
|
||||
*/
|
||||
void writeMetadata(byte[] bytes, IHLLMetadata metadata);
|
||||
|
||||
/**
|
||||
* Reads the metadata bytes of the serialized HLL.
|
||||
*
|
||||
* @param bytes the serialized HLL
|
||||
* @return the HLL metadata
|
||||
*/
|
||||
IHLLMetadata readMetadata(byte[] bytes);
|
||||
|
||||
/**
|
||||
* Builds an HLL serializer that matches this schema version.
|
||||
*
|
||||
* @param type the HLL type that will be serialized. This cannot be
|
||||
* <code>null</code>.
|
||||
* @param wordLength the length of the 'words' that comprise the data of the
|
||||
* HLL. Words must be at least 5 bits and at most 64 bits long.
|
||||
* @param wordCount the number of 'words' in the HLL's data.
|
||||
* @return a byte array serializer used to serialize a HLL according
|
||||
* to this schema version's specification.
|
||||
* @see #paddingBytes(HLLType)
|
||||
* @see IWordSerializer
|
||||
*/
|
||||
IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount);
|
||||
|
||||
/**
|
||||
* Builds an HLL deserializer that matches this schema version.
|
||||
*
|
||||
* @param type the HLL type that will be deserialized. This cannot be
|
||||
* <code>null</code>.
|
||||
* @param wordLength the length of the 'words' that comprise the data of the
|
||||
* serialized HLL. Words must be at least 5 bits and at most 64
|
||||
* bits long.
|
||||
* @param bytes the serialized HLL to deserialize. This cannot be
|
||||
* <code>null</code>.
|
||||
* @return a byte array deserializer used to deserialize a HLL serialized
|
||||
* according to this schema version's specification.
|
||||
*/
|
||||
IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes);
|
||||
|
||||
/**
|
||||
* @return the schema version number.
|
||||
*/
|
||||
int schemaVersionNumber();
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Reads 'words' of a fixed width, in sequence, from a byte array.
|
||||
*/
|
||||
public interface IWordDeserializer {
|
||||
/**
|
||||
* @return the next word in the sequence. Should not be called more than
|
||||
* {@link #totalWordCount()} times.
|
||||
*/
|
||||
long readWord();
|
||||
|
||||
/**
|
||||
* Returns the number of words that could be encoded in the sequence.<p/>
|
||||
*
|
||||
* NOTE: the sequence that was encoded may be shorter than the value this
|
||||
* method returns due to padding issues within bytes. This guarantees
|
||||
* only an upper bound on the number of times {@link #readWord()}
|
||||
* can be called.
|
||||
*
|
||||
* @return the maximum number of words that could be read from the sequence.
|
||||
*/
|
||||
int totalWordCount();
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Writes 'words' of fixed width, in sequence, to a byte array.
|
||||
*/
|
||||
interface IWordSerializer {
|
||||
|
||||
/**
|
||||
* Writes the word to the backing array.
|
||||
*
|
||||
* @param word the word to write.
|
||||
*/
|
||||
void writeWord(final long word);
|
||||
|
||||
/**
|
||||
* Returns the backing array of <code>byte</code>s that contain the serialized
|
||||
* words.
|
||||
* @return the serialized words as a <code>byte[]</code>.
|
||||
*/
|
||||
byte[] getBytes();
|
||||
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A <code>long</code>-based iterator. This is not <i>is-a</i> {@link java.util.Iterator}
|
||||
* to prevent autoboxing between <code>Long</code> and <code>long</code>.
|
||||
*/
|
||||
interface LongIterator {
|
||||
/**
|
||||
* @return <code>true</code> if and only if there are more elements to
|
||||
* iterate over. <code>false</code> otherwise.
|
||||
*/
|
||||
boolean hasNext();
|
||||
|
||||
/**
|
||||
* @return the next <code>long</code> in the collection.
|
||||
*/
|
||||
long next();
|
||||
}
|
|
@ -0,0 +1,172 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A collection of utilities to work with numbers.
|
||||
*/
|
||||
class NumberUtil {
|
||||
// loge(2) (log-base e of 2)
|
||||
public static final double LOGE_2 = 0.6931471805599453;
|
||||
|
||||
// ************************************************************************
|
||||
/**
|
||||
* Computes the <code>log2</code> (log-base-two) of the specified value.
|
||||
*
|
||||
* @param value the <code>double</code> for which the <code>log2</code> is
|
||||
* desired.
|
||||
* @return the <code>log2</code> of the specified value
|
||||
*/
|
||||
public static double log2(final double value) {
|
||||
// REF: http://en.wikipedia.org/wiki/Logarithmic_scale (conversion of bases)
|
||||
return Math.log(value) / LOGE_2;
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// the hex characters
|
||||
private static final char[] HEX = { '0', '1', '2', '3', '4', '5', '6', '7',
|
||||
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
/**
|
||||
* Converts the specified array of <code>byte</code>s into a string of
|
||||
* hex characters (low <code>byte</code> first).
|
||||
*
|
||||
* @param bytes the array of <code>byte</code>s that are to be converted.
|
||||
* This cannot be <code>null</code> though it may be empty.
|
||||
* @param offset the offset in <code>bytes</code> at which the bytes will
|
||||
* be taken. This cannot be negative and must be less than
|
||||
* <code>bytes.length - 1</code>.
|
||||
* @param count the number of bytes to be retrieved from the specified array.
|
||||
* This cannot be negative. If greater than <code>bytes.length - offset</code>
|
||||
* then that value is used.
|
||||
* @return a string of at most <code>count</code> characters that represents
|
||||
* the specified byte array in hex. This will never be <code>null</code>
|
||||
* though it may be empty if <code>bytes</code> is empty or <code>count</code>
|
||||
* is zero.
|
||||
* @throws IllegalArgumentException if <code>offset</code> is greater than
|
||||
* or equal to <code>bytes.length</code>.
|
||||
* @see #fromHex(String, int, int)
|
||||
*/
|
||||
public static String toHex(final byte[] bytes, final int offset, final int count) {
|
||||
if(offset >= bytes.length) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + bytes.length + ").")/*by contract*/;
|
||||
final int byteCount = Math.min( (bytes.length - offset), count);
|
||||
final int upperBound = byteCount + offset;
|
||||
|
||||
final char[] chars = new char[byteCount * 2/*two chars per byte*/];
|
||||
int charIndex = 0;
|
||||
for(int i=offset; i<upperBound; i++) {
|
||||
final byte value = bytes[i];
|
||||
chars[charIndex++] = HEX[(value >>> 4) & 0x0F];
|
||||
chars[charIndex++] = HEX[value & 0x0F];
|
||||
}
|
||||
|
||||
return new String(chars);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the specified array of hex characters into an array of <code>byte</code>s
|
||||
* (low <code>byte</code> first).
|
||||
*
|
||||
* @param string the string of hex characters to be converted into <code>byte</code>s.
|
||||
* This cannot be <code>null</code> though it may be blank.
|
||||
* @param offset the offset in the string at which the characters will be
|
||||
* taken. This cannot be negative and must be less than <code>string.length() - 1</code>.
|
||||
* @param count the number of characters to be retrieved from the specified
|
||||
* string. This cannot be negative and must be divisible by two
|
||||
* (since there are two characters per <code>byte</code>).
|
||||
* @return the array of <code>byte</code>s that were converted from the
|
||||
* specified string (in the specified range). This will never be
|
||||
* <code>null</code> though it may be empty if <code>string</code>
|
||||
* is empty or <code>count</code> is zero.
|
||||
* @throws IllegalArgumentException if <code>offset</code> is greater than
|
||||
* or equal to <code>string.length()</code> or if <code>count</code>
|
||||
* is not divisible by two.
|
||||
* @see #toHex(byte[], int, int)
|
||||
*/
|
||||
public static byte[] fromHex(final String string, final int offset, final int count) {
|
||||
if(offset >= string.length()) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + string.length() + ").")/*by contract*/;
|
||||
if( (count & 0x01) != 0) throw new IllegalArgumentException("Count is not divisible by two (" + count + ").")/*by contract*/;
|
||||
final int charCount = Math.min((string.length() - offset), count);
|
||||
final int upperBound = offset + charCount;
|
||||
|
||||
final byte[] bytes = new byte[charCount >>> 1/*aka /2*/];
|
||||
int byteIndex = 0/*beginning*/;
|
||||
for(int i=offset; i<upperBound; i+=2) {
|
||||
bytes[byteIndex++] = (byte)(( (digit(string.charAt(i)) << 4)
|
||||
| digit(string.charAt(i + 1))) & 0xFF);
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
/**
|
||||
* @param character a hex character to be converted to a <code>byte</code>.
|
||||
* This cannot be a character other than [a-fA-F0-9].
|
||||
* @return the value of the specified character. This will be a value <code>0</code>
|
||||
* through <code>15</code>.
|
||||
* @throws IllegalArgumentException if the specified character is not in
|
||||
* [a-fA-F0-9]
|
||||
*/
|
||||
private static final int digit(final char character) {
|
||||
switch(character) {
|
||||
case '0':
|
||||
return 0;
|
||||
case '1':
|
||||
return 1;
|
||||
case '2':
|
||||
return 2;
|
||||
case '3':
|
||||
return 3;
|
||||
case '4':
|
||||
return 4;
|
||||
case '5':
|
||||
return 5;
|
||||
case '6':
|
||||
return 6;
|
||||
case '7':
|
||||
return 7;
|
||||
case '8':
|
||||
return 8;
|
||||
case '9':
|
||||
return 9;
|
||||
case 'a':
|
||||
case 'A':
|
||||
return 10;
|
||||
case 'b':
|
||||
case 'B':
|
||||
return 11;
|
||||
case 'c':
|
||||
case 'C':
|
||||
return 12;
|
||||
case 'd':
|
||||
case 'D':
|
||||
return 13;
|
||||
case 'e':
|
||||
case 'E':
|
||||
return 14;
|
||||
case 'f':
|
||||
case 'F':
|
||||
return 15;
|
||||
|
||||
default:
|
||||
throw new IllegalArgumentException("Character is not in [a-fA-F0-9] ('" + character + "').");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A concrete {@link ISchemaVersion} representing schema version one.
|
||||
*
|
||||
* @author timon
|
||||
*/
|
||||
class SchemaVersionOne implements ISchemaVersion {
|
||||
/**
|
||||
* The schema version number for this instance.
|
||||
*/
|
||||
public static final int SCHEMA_VERSION = 1;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Version-specific ordinals (array position) for each of the HLL types
|
||||
private static final HLLType[] TYPE_ORDINALS = new HLLType[] {
|
||||
HLLType.EMPTY,
|
||||
HLLType.EXPLICIT,
|
||||
HLLType.SPARSE,
|
||||
HLLType.FULL
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// number of header bytes for all HLL types
|
||||
private static final int HEADER_BYTE_COUNT = 3;
|
||||
|
||||
// sentinel values from the spec for explicit off and auto
|
||||
private static final int EXPLICIT_OFF = 0;
|
||||
private static final int EXPLICIT_AUTO = 63;
|
||||
|
||||
// ************************************************************************
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.ISchemaVersion#paddingBytes(HLLType)
|
||||
*/
|
||||
@Override
|
||||
public int paddingBytes(final HLLType type) {
|
||||
return HEADER_BYTE_COUNT;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.ISchemaVersion#writeMetadata(byte[], IHLLMetadata)
|
||||
*/
|
||||
@Override
|
||||
public void writeMetadata(final byte[] bytes, final IHLLMetadata metadata) {
|
||||
final HLLType type = metadata.HLLType();
|
||||
final int typeOrdinal = getOrdinal(type);
|
||||
|
||||
final int explicitCutoffValue;
|
||||
if(metadata.explicitOff()) {
|
||||
explicitCutoffValue = EXPLICIT_OFF;
|
||||
} else if(metadata.explicitAuto()) {
|
||||
explicitCutoffValue = EXPLICIT_AUTO;
|
||||
} else {
|
||||
explicitCutoffValue = metadata.log2ExplicitCutoff() + 1/*per spec*/;
|
||||
}
|
||||
|
||||
bytes[0] = SerializationUtil.packVersionByte(SCHEMA_VERSION, typeOrdinal);
|
||||
bytes[1] = SerializationUtil.packParametersByte(metadata.registerWidth(), metadata.registerCountLog2());
|
||||
bytes[2] = SerializationUtil.packCutoffByte(explicitCutoffValue, metadata.sparseEnabled());
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.ISchemaVersion#readMetadata(byte[])
|
||||
*/
|
||||
@Override
|
||||
public IHLLMetadata readMetadata(final byte[] bytes) {
|
||||
final byte versionByte = bytes[0];
|
||||
final byte parametersByte = bytes[1];
|
||||
final byte cutoffByte = bytes[2];
|
||||
|
||||
final int typeOrdinal = SerializationUtil.typeOrdinal(versionByte);
|
||||
final int explicitCutoffValue = SerializationUtil.explicitCutoff(cutoffByte);
|
||||
final boolean explicitOff = (explicitCutoffValue == EXPLICIT_OFF);
|
||||
final boolean explicitAuto = (explicitCutoffValue == EXPLICIT_AUTO);
|
||||
final int log2ExplicitCutoff = (explicitOff || explicitAuto) ? -1/*sentinel*/ : (explicitCutoffValue - 1/*per spec*/);
|
||||
|
||||
return new HLLMetadata(SCHEMA_VERSION,
|
||||
getType(typeOrdinal),
|
||||
SerializationUtil.registerCountLog2(parametersByte),
|
||||
SerializationUtil.registerWidth(parametersByte),
|
||||
log2ExplicitCutoff,
|
||||
explicitOff,
|
||||
explicitAuto,
|
||||
SerializationUtil.sparseEnabled(cutoffByte));
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.ISchemaVersion#getSerializer(HLLType, int, int)
|
||||
*/
|
||||
@Override
|
||||
public IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount) {
|
||||
return new BigEndianAscendingWordSerializer(wordLength, wordCount, paddingBytes(type));
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.ISchemaVersion#getDeserializer(HLLType, int, byte[])
|
||||
*/
|
||||
@Override
|
||||
public IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes) {
|
||||
return new BigEndianAscendingWordDeserializer(wordLength, paddingBytes(type), bytes);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see net.agkn.hll.serialization.ISchemaVersion#schemaVersionNumber()
|
||||
*/
|
||||
@Override
|
||||
public int schemaVersionNumber() {
|
||||
return SCHEMA_VERSION;
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// Type/Ordinal lookups
|
||||
/**
|
||||
* Gets the ordinal for the specified {@link HLLType}.
|
||||
*
|
||||
* @param type the type whose ordinal is desired
|
||||
* @return the ordinal for the specified type, to be used in the version byte.
|
||||
* This will always be non-negative.
|
||||
*/
|
||||
private static int getOrdinal(final HLLType type) {
|
||||
for(int i=0; i<TYPE_ORDINALS.length; i++) {
|
||||
if(TYPE_ORDINALS[i].equals(type)) return i;
|
||||
}
|
||||
throw new RuntimeException("Unknown HLL type " + type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the {@link HLLType} for the specified ordinal.
|
||||
*
|
||||
* @param ordinal the ordinal whose type is desired
|
||||
* @return the type for the specified ordinal. This will never be <code>null</code>.
|
||||
*/
|
||||
private static HLLType getType(final int ordinal) {
|
||||
if((ordinal < 0) || (ordinal >= TYPE_ORDINALS.length)) {
|
||||
throw new IllegalArgumentException("Invalid type ordinal '" + ordinal + "'. Only 0-" + (TYPE_ORDINALS.length - 1) + " inclusive allowed.");
|
||||
}
|
||||
return TYPE_ORDINALS[ordinal];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,277 @@
|
|||
package org.apache.solr.util.hll;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A collection of constants and utilities for serializing and deserializing
|
||||
* HLLs.
|
||||
*
|
||||
* NOTE: 'package' visibility is used for many methods that only need to be
|
||||
* used by the {@link ISchemaVersion} implementations. The structure of
|
||||
* a serialized HLL's metadata should be opaque to the rest of the
|
||||
* library.
|
||||
*/
|
||||
class SerializationUtil {
|
||||
/**
|
||||
* The number of bits (of the parameters byte) dedicated to encoding the
|
||||
* width of the registers.
|
||||
*/
|
||||
/*package*/ static int REGISTER_WIDTH_BITS = 3;
|
||||
|
||||
/**
|
||||
* A mask to cap the maximum value of the register width.
|
||||
*/
|
||||
/*package*/ static int REGISTER_WIDTH_MASK = (1 << REGISTER_WIDTH_BITS) - 1;
|
||||
|
||||
/**
|
||||
* The number of bits (of the parameters byte) dedicated to encoding
|
||||
* <code>log2(registerCount)</code>.
|
||||
*/
|
||||
/*package*/ static int LOG2_REGISTER_COUNT_BITS = 5;
|
||||
|
||||
/**
|
||||
* A mask to cap the maximum value of <code>log2(registerCount)</code>.
|
||||
*/
|
||||
/*package*/ static int LOG2_REGISTER_COUNT_MASK = (1 << LOG2_REGISTER_COUNT_BITS) - 1;
|
||||
|
||||
/**
|
||||
* The number of bits (of the cutoff byte) dedicated to encoding the
|
||||
* log-base-2 of the explicit cutoff or sentinel values for
|
||||
* 'explicit-disabled' or 'auto'.
|
||||
*/
|
||||
/*package*/ static int EXPLICIT_CUTOFF_BITS = 6;
|
||||
|
||||
/**
|
||||
* A mask to cap the maximum value of the explicit cutoff choice.
|
||||
*/
|
||||
/*package*/ static int EXPLICIT_CUTOFF_MASK = (1 << EXPLICIT_CUTOFF_BITS) - 1;
|
||||
|
||||
/**
|
||||
* Number of bits in a nibble.
|
||||
*/
|
||||
private static int NIBBLE_BITS = 4;
|
||||
|
||||
/**
|
||||
* A mask to cap the maximum value of a nibble.
|
||||
*/
|
||||
private static int NIBBLE_MASK = (1 << NIBBLE_BITS) - 1;
|
||||
|
||||
// ************************************************************************
|
||||
// Serialization utilities
|
||||
|
||||
/**
|
||||
* Schema version one (v1).
|
||||
*/
|
||||
public static ISchemaVersion VERSION_ONE = new SchemaVersionOne();
|
||||
|
||||
/**
|
||||
* The default schema version for serializing HLLs.
|
||||
*/
|
||||
public static ISchemaVersion DEFAULT_SCHEMA_VERSION = VERSION_ONE;
|
||||
|
||||
/**
|
||||
* List of registered schema versions, indexed by their version numbers. If
|
||||
* an entry is <code>null</code>, then no such schema version is registered.
|
||||
* Similarly, registering a new schema version simply entails assigning an
|
||||
* {@link ISchemaVersion} instance to the appropriate index of this array.<p/>
|
||||
*
|
||||
* By default, only {@link SchemaVersionOne} is registered. Note that version
|
||||
* zero will always be reserved for internal (e.g. proprietary, legacy) schema
|
||||
* specifications/implementations and will never be assigned to in by this
|
||||
* library.
|
||||
*/
|
||||
public static ISchemaVersion[] REGISTERED_SCHEMA_VERSIONS = new ISchemaVersion[16];
|
||||
|
||||
static {
|
||||
REGISTERED_SCHEMA_VERSIONS[1] = VERSION_ONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param schemaVersionNumber the version number of the {@link ISchemaVersion}
|
||||
* desired. This must be a registered schema version number.
|
||||
* @return The {@link ISchemaVersion} for the given number. This will never
|
||||
* be <code>null</code>.
|
||||
*/
|
||||
public static ISchemaVersion getSchemaVersion(final int schemaVersionNumber) {
|
||||
if(schemaVersionNumber >= REGISTERED_SCHEMA_VERSIONS.length || schemaVersionNumber < 0) {
|
||||
throw new RuntimeException("Invalid schema version number " + schemaVersionNumber);
|
||||
}
|
||||
final ISchemaVersion schemaVersion = REGISTERED_SCHEMA_VERSIONS[schemaVersionNumber];
|
||||
if(schemaVersion == null) {
|
||||
throw new RuntimeException("Unknown schema version number " + schemaVersionNumber);
|
||||
}
|
||||
return schemaVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the appropriate {@link ISchemaVersion schema version} for the specified
|
||||
* serialized HLL.
|
||||
*
|
||||
* @param bytes the serialized HLL whose schema version is desired.
|
||||
* @return the schema version for the specified HLL. This will never
|
||||
* be <code>null</code>.
|
||||
*/
|
||||
public static ISchemaVersion getSchemaVersion(final byte[] bytes) {
|
||||
final byte versionByte = bytes[0];
|
||||
final int schemaVersionNumber = schemaVersion(versionByte);
|
||||
|
||||
return getSchemaVersion(schemaVersionNumber);
|
||||
}
|
||||
|
||||
// ************************************************************************
|
||||
// Package-specific shared helpers
|
||||
|
||||
/**
|
||||
* Generates a byte that encodes the schema version and the type ordinal
|
||||
* of the HLL.
|
||||
*
|
||||
* The top nibble is the schema version and the bottom nibble is the type
|
||||
* ordinal.
|
||||
*
|
||||
* @param schemaVersion the schema version to encode.
|
||||
* @param typeOrdinal the type ordinal of the HLL to encode.
|
||||
* @return the packed version byte
|
||||
*/
|
||||
public static byte packVersionByte(final int schemaVersion, final int typeOrdinal) {
|
||||
return (byte)(((NIBBLE_MASK & schemaVersion) << NIBBLE_BITS) | (NIBBLE_MASK & typeOrdinal));
|
||||
}
|
||||
/**
|
||||
* Generates a byte that encodes the log-base-2 of the explicit cutoff
|
||||
* or sentinel values for 'explicit-disabled' or 'auto', as well as the
|
||||
* boolean indicating whether to use {@link HLLType#SPARSE}
|
||||
* in the promotion hierarchy.
|
||||
*
|
||||
* The top bit is always padding, the second highest bit indicates the
|
||||
* 'sparse-enabled' boolean, and the lowest six bits encode the explicit
|
||||
* cutoff value.
|
||||
*
|
||||
* @param explicitCutoff the explicit cutoff value to encode.
|
||||
* <ul>
|
||||
* <li>
|
||||
* If 'explicit-disabled' is chosen, this value should be <code>0</code>.
|
||||
* </li>
|
||||
* <li>
|
||||
* If 'auto' is chosen, this value should be <code>63</code>.
|
||||
* </li>
|
||||
* <li>
|
||||
* If a cutoff of 2<sup>n</sup> is desired, for <code>0 <= n < 31</code>,
|
||||
* this value should be <code>n + 1</code>.
|
||||
* </li>
|
||||
* </ul>
|
||||
* @param sparseEnabled whether {@link HLLType#SPARSE}
|
||||
* should be used in the promotion hierarchy to improve HLL
|
||||
* storage.
|
||||
*
|
||||
* @return the packed cutoff byte
|
||||
*/
|
||||
public static byte packCutoffByte(final int explicitCutoff, final boolean sparseEnabled) {
|
||||
final int sparseBit = (sparseEnabled ? (1 << EXPLICIT_CUTOFF_BITS) : 0);
|
||||
return (byte)(sparseBit | (EXPLICIT_CUTOFF_MASK & explicitCutoff));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a byte that encodes the parameters of a
|
||||
* {@link HLLType#FULL} or {@link HLLType#SPARSE}
|
||||
* HLL.<p/>
|
||||
*
|
||||
* The top 3 bits are used to encode <code>registerWidth - 1</code>
|
||||
* (range of <code>registerWidth</code> is thus 1-9) and the bottom 5
|
||||
* bits are used to encode <code>registerCountLog2</code>
|
||||
* (range of <code>registerCountLog2</code> is thus 0-31).
|
||||
*
|
||||
* @param registerWidth the register width (must be at least 1 and at
|
||||
* most 9)
|
||||
* @param registerCountLog2 the log-base-2 of the register count (must
|
||||
* be at least 0 and at most 31)
|
||||
* @return the packed parameters byte
|
||||
*/
|
||||
public static byte packParametersByte(final int registerWidth, final int registerCountLog2) {
|
||||
final int widthBits = ((registerWidth - 1) & REGISTER_WIDTH_MASK);
|
||||
final int countBits = (registerCountLog2 & LOG2_REGISTER_COUNT_MASK);
|
||||
return (byte)((widthBits << LOG2_REGISTER_COUNT_BITS) | countBits);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the 'sparse-enabled' boolean from the cutoff byte of a serialized
|
||||
* HLL.
|
||||
*
|
||||
* @param cutoffByte the cutoff byte of the serialized HLL
|
||||
* @return the 'sparse-enabled' boolean
|
||||
*/
|
||||
public static boolean sparseEnabled(final byte cutoffByte) {
|
||||
return ((cutoffByte >>> EXPLICIT_CUTOFF_BITS) & 1) == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the explicit cutoff value from the cutoff byte of a serialized
|
||||
* HLL.
|
||||
*
|
||||
* @param cutoffByte the cutoff byte of the serialized HLL
|
||||
* @return the explicit cutoff value
|
||||
*/
|
||||
public static int explicitCutoff(final byte cutoffByte) {
|
||||
return (cutoffByte & EXPLICIT_CUTOFF_MASK);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the schema version from the version byte of a serialized
|
||||
* HLL.
|
||||
*
|
||||
* @param versionByte the version byte of the serialized HLL
|
||||
* @return the schema version of the serialized HLL
|
||||
*/
|
||||
public static int schemaVersion(final byte versionByte) {
|
||||
return NIBBLE_MASK & (versionByte >>> NIBBLE_BITS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the type ordinal from the version byte of a serialized HLL.
|
||||
*
|
||||
* @param versionByte the version byte of the serialized HLL
|
||||
* @return the type ordinal of the serialized HLL
|
||||
*/
|
||||
public static int typeOrdinal(final byte versionByte) {
|
||||
return (versionByte & NIBBLE_MASK);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the register width from the parameters byte of a serialized
|
||||
* {@link HLLType#FULL} HLL.
|
||||
*
|
||||
* @param parametersByte the parameters byte of the serialized HLL
|
||||
* @return the register width of the serialized HLL
|
||||
*
|
||||
* @see #packParametersByte(int, int)
|
||||
*/
|
||||
public static int registerWidth(final byte parametersByte) {
|
||||
return ((parametersByte >>> LOG2_REGISTER_COUNT_BITS) & REGISTER_WIDTH_MASK) + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the log2(registerCount) from the parameters byte of a
|
||||
* serialized {@link HLLType#FULL} HLL.
|
||||
*
|
||||
* @param parametersByte the parameters byte of the serialized HLL
|
||||
* @return log2(registerCount) of the serialized HLL
|
||||
*
|
||||
* @see #packParametersByte(int, int)
|
||||
*/
|
||||
public static int registerCountLog2(final byte parametersByte) {
|
||||
return (parametersByte & LOG2_REGISTER_COUNT_MASK);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A fork of <a href="https://github.com/aggregateknowledge/java-hll/">Java-HyperLogLog</a> package tweaked
|
||||
* not to depend on fastutil and with cleanups to make it lean and clean.
|
||||
*/
|
||||
package org.apache.solr.util.hll;
|
||||
|
||||
|
|
@ -55,9 +55,9 @@ import org.apache.solr.util.AbstractSolrTestCase;
|
|||
|
||||
import org.apache.commons.math3.util.Combinations;
|
||||
import com.tdunning.math.stats.AVLTreeDigest;
|
||||
import net.agkn.hll.HLL;
|
||||
import com.google.common.hash.Hashing;
|
||||
import com.google.common.hash.HashFunction;
|
||||
import com.google.common.hash.HashFunction;
|
||||
import org.apache.solr.util.hll.HLL;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.solr.client.solrj.response.QueryResponse;
|
|||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
||||
import net.agkn.hll.HLL;
|
||||
import org.apache.solr.util.hll.HLL;
|
||||
import com.google.common.hash.Hashing;
|
||||
import com.google.common.hash.HashFunction;
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ import java.util.Map;
|
|||
import java.util.Random;
|
||||
|
||||
import com.tdunning.math.stats.AVLTreeDigest;
|
||||
import net.agkn.hll.HLL;
|
||||
import org.apache.solr.util.hll.HLL;
|
||||
import org.apache.lucene.queryparser.flexible.standard.processors.NumericQueryNodeProcessor;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.packed.GrowableWriter;
|
||||
|
|
Loading…
Reference in New Issue