mirror of https://github.com/apache/lucene.git
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
ba46eb7be4
dev-tools/scripts
lucene
CHANGES.txt
core/src/java/org/apache/lucene/util
sandbox/src
java/org/apache/lucene/document
test/org/apache/lucene/document
solr
CHANGES.txt
core/src
java/org/apache/solr
test/org/apache/solr
solrj/src/java/org/apache/solr/common/params
|
@ -61,8 +61,12 @@ def getGitRev():
|
|||
status = os.popen('git status').read().strip()
|
||||
if 'nothing to commit, working directory clean' not in status:
|
||||
raise RuntimeError('git clone is dirty:\n\n%s' % status)
|
||||
branch = os.popen('git rev-parse --abbrev-ref HEAD').read().strip()
|
||||
command = 'git log origin/%s..' % branch
|
||||
unpushedCommits = os.popen(command).read().strip()
|
||||
if len(unpushedCommits) > 0:
|
||||
raise RuntimeError('There are unpushed commits - "%s" output is:\n\n%s' % (command, unpushedCommits))
|
||||
|
||||
# TODO: we should also detect unpushed changes here? Something like "git cherry -v origin/branch_5_5"?
|
||||
print(' git clone is clean')
|
||||
return os.popen('git rev-parse HEAD').read().strip()
|
||||
|
||||
|
@ -115,47 +119,6 @@ def prepare(root, version, gpgKeyID, gpgPassword):
|
|||
print()
|
||||
return rev
|
||||
|
||||
def push(version, root, rev, rcNum, username):
|
||||
print('Push...')
|
||||
dir = 'lucene-solr-%s-RC%d-rev%s' % (version, rcNum, rev)
|
||||
s = os.popen('ssh %s@people.apache.org "ls -ld public_html/staging_area/%s" 2>&1' % (username, dir)).read()
|
||||
if 'no such file or directory' not in s.lower():
|
||||
print(' Remove old dir...')
|
||||
run('ssh %s@people.apache.org "chmod -R u+rwX public_html/staging_area/%s; rm -rf public_html/staging_area/%s"' %
|
||||
(username, dir, dir))
|
||||
run('ssh %s@people.apache.org "mkdir -p public_html/staging_area/%s/lucene public_html/staging_area/%s/solr"' % \
|
||||
(username, dir, dir))
|
||||
print(' Lucene')
|
||||
os.chdir('%s/lucene/dist' % root)
|
||||
print(' zip...')
|
||||
if os.path.exists('lucene.tar.bz2'):
|
||||
os.remove('lucene.tar.bz2')
|
||||
run('tar cjf lucene.tar.bz2 *')
|
||||
print(' copy...')
|
||||
run('scp lucene.tar.bz2 %s@people.apache.org:public_html/staging_area/%s/lucene' % (username, dir))
|
||||
print(' unzip...')
|
||||
run('ssh %s@people.apache.org "cd public_html/staging_area/%s/lucene; tar xjf lucene.tar.bz2; rm -f lucene.tar.bz2"' % (username, dir))
|
||||
os.remove('lucene.tar.bz2')
|
||||
|
||||
print(' Solr')
|
||||
os.chdir('%s/solr/package' % root)
|
||||
print(' zip...')
|
||||
if os.path.exists('solr.tar.bz2'):
|
||||
os.remove('solr.tar.bz2')
|
||||
run('tar cjf solr.tar.bz2 *')
|
||||
print(' copy...')
|
||||
run('scp solr.tar.bz2 %s@people.apache.org:public_html/staging_area/%s/solr' % (username, dir))
|
||||
print(' unzip...')
|
||||
run('ssh %s@people.apache.org "cd public_html/staging_area/%s/solr; tar xjf solr.tar.bz2; rm -f solr.tar.bz2"' % (username, dir))
|
||||
os.remove('solr.tar.bz2')
|
||||
|
||||
print(' chmod...')
|
||||
run('ssh %s@people.apache.org "chmod -R a+rX-w public_html/staging_area/%s"' % (username, dir))
|
||||
|
||||
print(' done!')
|
||||
url = 'http://people.apache.org/~%s/staging_area/%s' % (username, dir)
|
||||
return url
|
||||
|
||||
def pushLocal(version, root, rev, rcNum, localDir):
|
||||
print('Push local [%s]...' % localDir)
|
||||
os.makedirs(localDir)
|
||||
|
@ -206,29 +169,23 @@ def read_version(path):
|
|||
def parse_config():
|
||||
epilogue = textwrap.dedent('''
|
||||
Example usage for a Release Manager:
|
||||
python3.2 -u buildAndPushRelease.py --push-remote mikemccand --sign 6E68DA61 --rc-num 1 /path/to/lucene_solr_4_7
|
||||
python3 -u dev-tools/scripts/buildAndPushRelease.py --push-local /tmp/releases/6.0.1 --sign 6E68DA61 --rc-num 1
|
||||
''')
|
||||
description = 'Utility to build, push, and test a release.'
|
||||
parser = argparse.ArgumentParser(description=description, epilog=epilogue,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument('--no-prepare', dest='prepare', default=True, action='store_false',
|
||||
help='Use the already built release in the provided checkout')
|
||||
parser.add_argument('--push-remote', metavar='USERNAME',
|
||||
help='Push the release to people.apache.org for the given user')
|
||||
parser.add_argument('--push-local', metavar='PATH',
|
||||
help='Push the release to the local path')
|
||||
parser.add_argument('--sign', metavar='KEYID',
|
||||
help='Sign the release with the given gpg key')
|
||||
parser.add_argument('--rc-num', metavar='NUM', type=int, default=1,
|
||||
help='Release Candidate number, required')
|
||||
parser.add_argument('--smoke-test', metavar='PATH',
|
||||
help='Run the smoker tester on the release in the given directory')
|
||||
parser.add_argument('root', metavar='checkout_path',
|
||||
help='Root of SVN checkout for lucene-solr')
|
||||
help='Release Candidate number. Default: 1')
|
||||
parser.add_argument('--root', metavar='PATH', default='.',
|
||||
help='Root of Git working tree for lucene-solr. Default: "." (the current directory)')
|
||||
config = parser.parse_args()
|
||||
|
||||
if config.push_remote is not None and config.push_local is not None:
|
||||
parser.error('Cannot specify --push-remote and --push-local together')
|
||||
if not config.prepare and config.sign:
|
||||
parser.error('Cannot sign already built release')
|
||||
if config.push_local is not None and os.path.exists(config.push_local):
|
||||
|
@ -236,8 +193,13 @@ def parse_config():
|
|||
if config.rc_num <= 0:
|
||||
parser.error('Release Candidate number must be a positive integer')
|
||||
if not os.path.isdir(config.root):
|
||||
# TODO: add additional git check to ensure dir is a real lucene-solr checkout
|
||||
parser.error('Root path is not a valid lucene-solr checkout')
|
||||
parser.error('Root path "%s" is not a directory' % config.root)
|
||||
cwd = os.getcwd()
|
||||
os.chdir(config.root)
|
||||
config.root = os.getcwd() # Absolutize root dir
|
||||
if os.system('git rev-parse') or 3 != len([d for d in ('dev-tools','lucene','solr') if os.path.isdir(d)]):
|
||||
parser.error('Root path "%s" is not a valid lucene-solr checkout' % config.root)
|
||||
os.chdir(cwd)
|
||||
|
||||
config.version = read_version(config.root)
|
||||
print('Building version: %s' % config.version)
|
||||
|
@ -251,8 +213,17 @@ def parse_config():
|
|||
config.gpg_password = None
|
||||
|
||||
return config
|
||||
|
||||
def check_cmdline_tools(): # Fail fast if there are cmdline tool problems
|
||||
if os.system('git --version >/dev/null 2>/dev/null'):
|
||||
raise RuntimeError('"git --version" returned a non-zero exit code.')
|
||||
antVersion = os.popen('ant -version').read().strip()
|
||||
if not antVersion.startswith('Apache Ant(TM) version 1.8'):
|
||||
raise RuntimeError('ant version is not 1.8.X: "%s"' % antVersion)
|
||||
|
||||
def main():
|
||||
check_cmdline_tools()
|
||||
|
||||
c = parse_config()
|
||||
|
||||
if c.prepare:
|
||||
|
@ -261,19 +232,17 @@ def main():
|
|||
os.chdir(root)
|
||||
rev = open('rev.txt', encoding='UTF-8').read()
|
||||
|
||||
if c.push_remote:
|
||||
url = push(c.version, c.root, rev, c.rc_num, c.push_remote)
|
||||
elif c.push_local:
|
||||
if c.push_local:
|
||||
url = pushLocal(c.version, c.root, rev, c.rc_num, c.push_local)
|
||||
else:
|
||||
url = None
|
||||
|
||||
if url is not None:
|
||||
print(' URL: %s' % url)
|
||||
print('Next set the PYTHON_EXEC env var and you can run the smoker tester:')
|
||||
p = re.compile("(.*)\/")
|
||||
print('Next run the smoker tester:')
|
||||
p = re.compile(".*/")
|
||||
m = p.match(sys.argv[0])
|
||||
print(' $PYTHON_EXEC %ssmokeTestRelease.py %s' % (m.group(), url))
|
||||
print('%s -u %ssmokeTestRelease.py %s' % (sys.executable, m.group(), url))
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
|
|
|
@ -82,8 +82,9 @@ Optimizations
|
|||
* LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than
|
||||
waiting on a lock. (Adrien Grand)
|
||||
|
||||
* LUCENE-7261, LUCENE-7262, LUCENE-7264: Speed up DocIdSetBuilder (which is used
|
||||
by TermsQuery, multi-term queries and point queries). (Adrien Grand)
|
||||
* LUCENE-7261, LUCENE-7262, LUCENE-7264, LUCENE-7258: Speed up DocIdSetBuilder
|
||||
(which is used by TermsQuery, multi-term queries and several point queries).
|
||||
(Adrien Grand, Jeff Wartes, David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
|
|
|
@ -17,7 +17,9 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -56,13 +58,32 @@ public final class DocIdSetBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
private class BufferAdder extends BulkAdder {
|
||||
private static class Buffer {
|
||||
int[] array;
|
||||
int length;
|
||||
|
||||
Buffer(int length) {
|
||||
this.array = new int[length];
|
||||
this.length = 0;
|
||||
}
|
||||
|
||||
Buffer(int[] array, int length) {
|
||||
this.array = array;
|
||||
this.length = length;
|
||||
}
|
||||
}
|
||||
|
||||
private static class BufferAdder extends BulkAdder {
|
||||
final Buffer buffer;
|
||||
|
||||
BufferAdder(Buffer buffer) {
|
||||
this.buffer = buffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int doc) {
|
||||
buffer[bufferSize++] = doc;
|
||||
buffer.array[buffer.length++] = doc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final int maxDoc;
|
||||
|
@ -71,13 +92,13 @@ public final class DocIdSetBuilder {
|
|||
final boolean multivalued;
|
||||
final double numValuesPerDoc;
|
||||
|
||||
private int[] buffer;
|
||||
private int bufferSize;
|
||||
private List<Buffer> buffers = new ArrayList<>();
|
||||
private int totalAllocated; // accumulated size of the allocated buffers
|
||||
|
||||
private FixedBitSet bitSet;
|
||||
|
||||
private long counter = -1;
|
||||
private BulkAdder adder = new BufferAdder();
|
||||
private BulkAdder adder;
|
||||
|
||||
/**
|
||||
* Create a builder that can contain doc IDs between {@code 0} and {@code maxDoc}.
|
||||
|
@ -118,67 +139,30 @@ public final class DocIdSetBuilder {
|
|||
// of using a full bitset even for quite sparse data
|
||||
this.threshold = maxDoc >>> 7;
|
||||
|
||||
this.buffer = new int[0];
|
||||
this.bufferSize = 0;
|
||||
this.bitSet = null;
|
||||
}
|
||||
|
||||
private void upgradeToBitSet() {
|
||||
assert bitSet == null;
|
||||
bitSet = new FixedBitSet(maxDoc);
|
||||
for (int i = 0; i < bufferSize; ++i) {
|
||||
bitSet.set(buffer[i]);
|
||||
}
|
||||
counter = this.bufferSize;
|
||||
this.buffer = null;
|
||||
this.bufferSize = 0;
|
||||
this.adder = new FixedBitSetAdder(bitSet);
|
||||
}
|
||||
|
||||
/** Grows the buffer to at least minSize, but never larger than threshold. */
|
||||
private void growBuffer(int minSize) {
|
||||
assert minSize < threshold;
|
||||
if (buffer.length < minSize) {
|
||||
int nextSize = Math.min(threshold, ArrayUtil.oversize(minSize, Integer.BYTES));
|
||||
buffer = Arrays.copyOf(buffer, nextSize);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the content of the provided {@link DocIdSetIterator} to this builder.
|
||||
* NOTE: if you need to build a {@link DocIdSet} out of a single
|
||||
* {@link DocIdSetIterator}, you should rather use {@link RoaringDocIdSet.Builder}.
|
||||
*/
|
||||
public void add(DocIdSetIterator iter) throws IOException {
|
||||
grow((int) Math.min(Integer.MAX_VALUE, iter.cost()));
|
||||
|
||||
if (bitSet != null) {
|
||||
bitSet.or(iter);
|
||||
} else {
|
||||
while (true) {
|
||||
assert buffer.length <= threshold;
|
||||
final int end = buffer.length;
|
||||
for (int i = bufferSize; i < end; ++i) {
|
||||
final int doc = iter.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
bufferSize = i;
|
||||
return;
|
||||
}
|
||||
buffer[bufferSize++] = doc;
|
||||
}
|
||||
bufferSize = end;
|
||||
|
||||
if (bufferSize + 1 >= threshold) {
|
||||
break;
|
||||
}
|
||||
|
||||
growBuffer(bufferSize+1);
|
||||
}
|
||||
|
||||
upgradeToBitSet();
|
||||
for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
|
||||
bitSet.set(doc);
|
||||
return;
|
||||
}
|
||||
int cost = (int) Math.min(Integer.MAX_VALUE, iter.cost());
|
||||
BulkAdder adder = grow(cost);
|
||||
for (int i = 0; i < cost; ++i) {
|
||||
int doc = iter.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return;
|
||||
}
|
||||
adder.add(doc);
|
||||
}
|
||||
for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
|
||||
grow(1).add(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -188,9 +172,8 @@ public final class DocIdSetBuilder {
|
|||
*/
|
||||
public BulkAdder grow(int numDocs) {
|
||||
if (bitSet == null) {
|
||||
final long newLength = (long) bufferSize + numDocs;
|
||||
if (newLength < threshold) {
|
||||
growBuffer((int) newLength);
|
||||
if ((long) totalAllocated + numDocs <= threshold) {
|
||||
ensureBufferCapacity(numDocs);
|
||||
} else {
|
||||
upgradeToBitSet();
|
||||
counter += numDocs;
|
||||
|
@ -201,6 +184,131 @@ public final class DocIdSetBuilder {
|
|||
return adder;
|
||||
}
|
||||
|
||||
private void ensureBufferCapacity(int numDocs) {
|
||||
if (buffers.isEmpty()) {
|
||||
addBuffer(additionalCapacity(numDocs));
|
||||
return;
|
||||
}
|
||||
|
||||
Buffer current = buffers.get(buffers.size() - 1);
|
||||
if (current.array.length - current.length >= numDocs) {
|
||||
// current buffer is large enough
|
||||
return;
|
||||
}
|
||||
if (current.length < current.array.length - (current.array.length >>> 3)) {
|
||||
// current buffer is less than 7/8 full, resize rather than waste space
|
||||
growBuffer(current, additionalCapacity(numDocs));
|
||||
} else {
|
||||
addBuffer(additionalCapacity(numDocs));
|
||||
}
|
||||
}
|
||||
|
||||
private int additionalCapacity(int numDocs) {
|
||||
// exponential growth: the new array has a size equal to the sum of what
|
||||
// has been allocated so far
|
||||
int c = totalAllocated;
|
||||
// but is also >= numDocs + 1 so that we can store the next batch of docs
|
||||
// (plus an empty slot so that we are more likely to reuse the array in build())
|
||||
c = Math.max(numDocs + 1, c);
|
||||
// avoid cold starts
|
||||
c = Math.max(32, c);
|
||||
// do not go beyond the threshold
|
||||
c = Math.min(threshold - totalAllocated, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
private Buffer addBuffer(int len) {
|
||||
Buffer buffer = new Buffer(len);
|
||||
buffers.add(buffer);
|
||||
adder = new BufferAdder(buffer);
|
||||
totalAllocated += buffer.array.length;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private void growBuffer(Buffer buffer, int additionalCapacity) {
|
||||
buffer.array = Arrays.copyOf(buffer.array, buffer.array.length + additionalCapacity);
|
||||
totalAllocated += additionalCapacity;
|
||||
}
|
||||
|
||||
private void upgradeToBitSet() {
|
||||
assert bitSet == null;
|
||||
FixedBitSet bitSet = new FixedBitSet(maxDoc);
|
||||
long counter = 0;
|
||||
for (Buffer buffer : buffers) {
|
||||
int[] array = buffer.array;
|
||||
int length = buffer.length;
|
||||
counter += length;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
bitSet.set(array[i]);
|
||||
}
|
||||
}
|
||||
this.bitSet = bitSet;
|
||||
this.counter = counter;
|
||||
this.buffers = null;
|
||||
this.adder = new FixedBitSetAdder(bitSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a {@link DocIdSet} from the accumulated doc IDs.
|
||||
*/
|
||||
public DocIdSet build() {
|
||||
try {
|
||||
if (bitSet != null) {
|
||||
assert counter >= 0;
|
||||
final long cost = Math.round(counter / numValuesPerDoc);
|
||||
return new BitDocIdSet(bitSet, cost);
|
||||
} else {
|
||||
Buffer concatenated = concat(buffers);
|
||||
LSBRadixSorter sorter = new LSBRadixSorter();
|
||||
sorter.sort(PackedInts.bitsRequired(maxDoc - 1), concatenated.array, concatenated.length);
|
||||
final int l;
|
||||
if (multivalued) {
|
||||
l = dedup(concatenated.array, concatenated.length);
|
||||
} else {
|
||||
assert noDups(concatenated.array, concatenated.length);
|
||||
l = concatenated.length;
|
||||
}
|
||||
assert l <= concatenated.length;
|
||||
concatenated.array[l] = DocIdSetIterator.NO_MORE_DOCS;
|
||||
return new IntArrayDocIdSet(concatenated.array, l);
|
||||
}
|
||||
} finally {
|
||||
this.buffers = null;
|
||||
this.bitSet = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Concatenate the buffers in any order, leaving at least one empty slot in
|
||||
* the end
|
||||
* NOTE: this method might reuse one of the arrays
|
||||
*/
|
||||
private static Buffer concat(List<Buffer> buffers) {
|
||||
int totalLength = 0;
|
||||
Buffer largestBuffer = null;
|
||||
for (Buffer buffer : buffers) {
|
||||
totalLength += buffer.length;
|
||||
if (largestBuffer == null || buffer.array.length > largestBuffer.array.length) {
|
||||
largestBuffer = buffer;
|
||||
}
|
||||
}
|
||||
if (largestBuffer == null) {
|
||||
return new Buffer(1);
|
||||
}
|
||||
int[] docs = largestBuffer.array;
|
||||
if (docs.length < totalLength + 1) {
|
||||
docs = Arrays.copyOf(docs, totalLength + 1);
|
||||
}
|
||||
totalLength = largestBuffer.length;
|
||||
for (Buffer buffer : buffers) {
|
||||
if (buffer != largestBuffer) {
|
||||
System.arraycopy(buffer.array, 0, docs, totalLength, buffer.length);
|
||||
totalLength += buffer.length;
|
||||
}
|
||||
}
|
||||
return new Buffer(docs, totalLength);
|
||||
}
|
||||
|
||||
private static int dedup(int[] arr, int length) {
|
||||
if (length == 0) {
|
||||
return 0;
|
||||
|
@ -225,35 +333,4 @@ public final class DocIdSetBuilder {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a {@link DocIdSet} from the accumulated doc IDs.
|
||||
*/
|
||||
public DocIdSet build() {
|
||||
try {
|
||||
if (bitSet != null) {
|
||||
assert counter >= 0;
|
||||
final long cost = Math.round(counter / numValuesPerDoc);
|
||||
return new BitDocIdSet(bitSet, cost);
|
||||
} else {
|
||||
LSBRadixSorter sorter = new LSBRadixSorter();
|
||||
sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, bufferSize);
|
||||
final int l;
|
||||
if (multivalued) {
|
||||
l = dedup(buffer, bufferSize);
|
||||
} else {
|
||||
assert noDups(buffer, bufferSize);
|
||||
l = bufferSize;
|
||||
}
|
||||
assert l <= bufferSize;
|
||||
buffer = ArrayUtil.grow(buffer, l + 1);
|
||||
buffer[l] = DocIdSetIterator.NO_MORE_DOCS;
|
||||
return new IntArrayDocIdSet(buffer, l);
|
||||
}
|
||||
} finally {
|
||||
this.buffer = null;
|
||||
this.bufferSize = 0;
|
||||
this.bitSet = null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,425 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.search.PointInSetQuery;
|
||||
import org.apache.lucene.search.PointRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* An indexed {@code half-float} field for fast range filters. If you also
|
||||
* need to store the value, you should add a separate {@link StoredField} instance.
|
||||
* If you need doc values, you can store them in a {@link NumericDocValuesField}
|
||||
* and use {@link #halfFloatToSortableShort} and
|
||||
* {@link #sortableShortToHalfFloat} for encoding/decoding.
|
||||
* <p>
|
||||
* The API takes floats, but they will be encoded to half-floats before being
|
||||
* indexed. In case the provided floats cannot be represented accurately as a
|
||||
* half float, they will be rounded to the closest value that can be
|
||||
* represented as a half float. In case of tie, values will be rounded to the
|
||||
* value that has a zero as its least significant bit.
|
||||
* <p>
|
||||
* Finding all documents within an N-dimensional at search time is
|
||||
* efficient. Multiple values for the same field in one document
|
||||
* is allowed.
|
||||
* <p>
|
||||
* This field defines static factory methods for creating common queries:
|
||||
* <ul>
|
||||
* <li>{@link #newExactQuery(String, float)} for matching an exact 1D point.
|
||||
* <li>{@link #newSetQuery(String, float...)} for matching a set of 1D values.
|
||||
* <li>{@link #newRangeQuery(String, float, float)} for matching a 1D range.
|
||||
* <li>{@link #newRangeQuery(String, float[], float[])} for matching points/ranges in n-dimensional space.
|
||||
* </ul>
|
||||
* @see PointValues
|
||||
*/
|
||||
public final class HalfFloatPoint extends Field {
|
||||
|
||||
/** The number of bytes used to represent a half-float value. */
|
||||
public static final int BYTES = 2;
|
||||
|
||||
/**
|
||||
* Return the first half float which is immediately greater than {@code v}.
|
||||
* If the argument is {@link Float#NaN} then the return value is
|
||||
* {@link Float#NaN}. If the argument is {@link Float#POSITIVE_INFINITY}
|
||||
* then the return value is {@link Float#POSITIVE_INFINITY}.
|
||||
*/
|
||||
public static float nextUp(float v) {
|
||||
if (Float.isNaN(v) || v == Float.POSITIVE_INFINITY) {
|
||||
return v;
|
||||
}
|
||||
short s = halfFloatToSortableShort(v);
|
||||
// if the float does not represent a half float accurately then just
|
||||
// converting back might give us the value we are looking for
|
||||
float r = sortableShortToHalfFloat(s);
|
||||
if (r <= v) {
|
||||
r = sortableShortToHalfFloat((short) (s + 1));
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the first half float which is immediately smaller than {@code v}.
|
||||
* If the argument is {@link Float#NaN} then the return value is
|
||||
* {@link Float#NaN}. If the argument is {@link Float#NEGATIVE_INFINITY}
|
||||
* then the return value is {@link Float#NEGATIVE_INFINITY}.
|
||||
*/
|
||||
public static float nextDown(float v) {
|
||||
if (Float.isNaN(v) || v == Float.NEGATIVE_INFINITY) {
|
||||
return v;
|
||||
}
|
||||
short s = halfFloatToSortableShort(v);
|
||||
// if the float does not represent a half float accurately then just
|
||||
// converting back might give us the value we are looking for
|
||||
float r = sortableShortToHalfFloat(s);
|
||||
if (r >= v) {
|
||||
r = sortableShortToHalfFloat((short) (s - 1));
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/** Convert a half-float to a short value that maintains ordering. */
|
||||
public static short halfFloatToSortableShort(float v) {
|
||||
return sortableShortBits(halfFloatToShortBits(v));
|
||||
}
|
||||
|
||||
/** Convert short bits to a half-float value that maintains ordering. */
|
||||
public static float sortableShortToHalfFloat(short bits) {
|
||||
return shortBitsToHalfFloat(sortableShortBits(bits));
|
||||
}
|
||||
|
||||
private static short sortableShortBits(short s) {
|
||||
return (short) (s ^ (s >> 15) & 0x7fff);
|
||||
}
|
||||
|
||||
static short halfFloatToShortBits(float v) {
|
||||
int floatBits = Float.floatToIntBits(v);
|
||||
int sign = floatBits >>> 31;
|
||||
int exp = (floatBits >>> 23) & 0xff;
|
||||
int mantissa = floatBits & 0x7fffff;
|
||||
|
||||
if (exp == 0xff) {
|
||||
// preserve NaN and Infinity
|
||||
exp = 0x1f;
|
||||
mantissa >>>= (23 - 10);
|
||||
} else if (exp == 0x00) {
|
||||
// denormal float rounded to zero since even the largest denormal float
|
||||
// cannot be represented as a half float
|
||||
mantissa = 0;
|
||||
} else {
|
||||
exp = exp - 127 + 15;
|
||||
if (exp >= 0x1f) {
|
||||
// too large, make it infinity
|
||||
exp = 0x1f;
|
||||
mantissa = 0;
|
||||
} else if (exp <= 0) {
|
||||
// we need to convert to a denormal representation
|
||||
int shift = 23 - 10 - exp + 1;
|
||||
if (shift >= 32) {
|
||||
// need a special case since shifts are mod 32...
|
||||
exp = 0;
|
||||
mantissa = 0;
|
||||
} else {
|
||||
// add the implicit bit
|
||||
mantissa |= 0x800000;
|
||||
mantissa = roundShift(mantissa, shift);
|
||||
exp = mantissa >>> 10;
|
||||
mantissa &= 0x3ff;
|
||||
}
|
||||
} else {
|
||||
mantissa = roundShift((exp << 23) | mantissa, 23 - 10);
|
||||
exp = mantissa >>> 10;
|
||||
mantissa &= 0x3ff;
|
||||
}
|
||||
}
|
||||
return (short) ((sign << 15) | (exp << 10) | mantissa);
|
||||
}
|
||||
|
||||
// divide by 2^shift and round to the closest int
|
||||
// round to even in case of tie
|
||||
static int roundShift(int i, int shift) {
|
||||
assert shift > 0;
|
||||
i += 1 << (shift - 1); // add 2^(shift-1) so that we round rather than truncate
|
||||
i -= (i >>> shift) & 1; // and subtract the shift-th bit so that we round to even in case of tie
|
||||
return i >>> shift;
|
||||
}
|
||||
|
||||
static float shortBitsToHalfFloat(short s) {
|
||||
int sign = s >>> 15;
|
||||
int exp = (s >>> 10) & 0x1f;
|
||||
int mantissa = s & 0x3ff;
|
||||
if (exp == 0x1f) {
|
||||
// NaN or infinities
|
||||
exp = 0xff;
|
||||
mantissa <<= (23 - 10);
|
||||
} else if (mantissa == 0 && exp == 0) {
|
||||
// zero
|
||||
} else {
|
||||
if (exp == 0) {
|
||||
// denormal half float becomes a normal float
|
||||
int shift = Integer.numberOfLeadingZeros(mantissa) - (32 - 11);
|
||||
mantissa = (mantissa << shift) & 0x3ff; // clear the implicit bit
|
||||
exp = exp - shift + 1;
|
||||
}
|
||||
exp = exp + 127 - 15;
|
||||
mantissa <<= (23 - 10);
|
||||
}
|
||||
|
||||
return Float.intBitsToFloat((sign << 31) | (exp << 23) | mantissa);
|
||||
}
|
||||
|
||||
static void shortToSortableBytes(short value, byte[] result, int offset) {
|
||||
// Flip the sign bit, so negative shorts sort before positive shorts correctly:
|
||||
value ^= 0x8000;
|
||||
result[offset] = (byte) (value >> 8);
|
||||
result[offset+1] = (byte) value;
|
||||
}
|
||||
|
||||
static short sortableBytesToShort(byte[] encoded, int offset) {
|
||||
short x = (short) (((encoded[offset] & 0xFF) << 8) | (encoded[offset+1] & 0xFF));
|
||||
// Re-flip the sign bit to restore the original value:
|
||||
return (short) (x ^ 0x8000);
|
||||
}
|
||||
|
||||
private static FieldType getType(int numDims) {
|
||||
FieldType type = new FieldType();
|
||||
type.setDimensions(numDims, BYTES);
|
||||
type.freeze();
|
||||
return type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setFloatValue(float value) {
|
||||
setFloatValues(value);
|
||||
}
|
||||
|
||||
/** Change the values of this field */
|
||||
public void setFloatValues(float... point) {
|
||||
if (type.pointDimensionCount() != point.length) {
|
||||
throw new IllegalArgumentException("this field (name=" + name + ") uses " + type.pointDimensionCount() + " dimensions; cannot change to (incoming) " + point.length + " dimensions");
|
||||
}
|
||||
fieldsData = pack(point);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBytesValue(BytesRef bytes) {
|
||||
throw new IllegalArgumentException("cannot change value type from float to BytesRef");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number numericValue() {
|
||||
if (type.pointDimensionCount() != 1) {
|
||||
throw new IllegalStateException("this field (name=" + name + ") uses " + type.pointDimensionCount() + " dimensions; cannot convert to a single numeric value");
|
||||
}
|
||||
BytesRef bytes = (BytesRef) fieldsData;
|
||||
assert bytes.length == BYTES;
|
||||
return decodeDimension(bytes.bytes, bytes.offset);
|
||||
}
|
||||
|
||||
private static BytesRef pack(float... point) {
|
||||
if (point == null) {
|
||||
throw new IllegalArgumentException("point must not be null");
|
||||
}
|
||||
if (point.length == 0) {
|
||||
throw new IllegalArgumentException("point must not be 0 dimensions");
|
||||
}
|
||||
byte[] packed = new byte[point.length * BYTES];
|
||||
|
||||
for (int dim = 0; dim < point.length; dim++) {
|
||||
encodeDimension(point[dim], packed, dim * BYTES);
|
||||
}
|
||||
|
||||
return new BytesRef(packed);
|
||||
}
|
||||
|
||||
/** Creates a new FloatPoint, indexing the
|
||||
* provided N-dimensional float point.
|
||||
*
|
||||
* @param name field name
|
||||
* @param point float[] value
|
||||
* @throws IllegalArgumentException if the field name or value is null.
|
||||
*/
|
||||
public HalfFloatPoint(String name, float... point) {
|
||||
super(name, pack(point), getType(point.length));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder result = new StringBuilder();
|
||||
result.append(getClass().getSimpleName());
|
||||
result.append(" <");
|
||||
result.append(name);
|
||||
result.append(':');
|
||||
|
||||
BytesRef bytes = (BytesRef) fieldsData;
|
||||
for (int dim = 0; dim < type.pointDimensionCount(); dim++) {
|
||||
if (dim > 0) {
|
||||
result.append(',');
|
||||
}
|
||||
result.append(decodeDimension(bytes.bytes, bytes.offset + dim * BYTES));
|
||||
}
|
||||
|
||||
result.append('>');
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
// public helper methods (e.g. for queries)
|
||||
|
||||
/** Encode single float dimension */
|
||||
public static void encodeDimension(float value, byte dest[], int offset) {
|
||||
shortToSortableBytes(halfFloatToSortableShort(value), dest, offset);
|
||||
}
|
||||
|
||||
/** Decode single float dimension */
|
||||
public static float decodeDimension(byte value[], int offset) {
|
||||
return sortableShortToHalfFloat(sortableBytesToShort(value, offset));
|
||||
}
|
||||
|
||||
// static methods for generating queries
|
||||
|
||||
/**
|
||||
* Create a query for matching an exact half-float value. It will be rounded
|
||||
* to the closest half-float if {@code value} cannot be represented accurately
|
||||
* as a half-float.
|
||||
* <p>
|
||||
* This is for simple one-dimension points, for multidimensional points use
|
||||
* {@link #newRangeQuery(String, float[], float[])} instead.
|
||||
*
|
||||
* @param field field name. must not be {@code null}.
|
||||
* @param value half-float value
|
||||
* @throws IllegalArgumentException if {@code field} is null.
|
||||
* @return a query matching documents with this exact value
|
||||
*/
|
||||
public static Query newExactQuery(String field, float value) {
|
||||
return newRangeQuery(field, value, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a range query for half-float values. Bounds will be rounded to the
|
||||
* closest half-float if they cannot be represented accurately as a
|
||||
* half-float.
|
||||
* <p>
|
||||
* This is for simple one-dimension ranges, for multidimensional ranges use
|
||||
* {@link #newRangeQuery(String, float[], float[])} instead.
|
||||
* <p>
|
||||
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||
* by setting {@code lowerValue = Float.NEGATIVE_INFINITY} or {@code upperValue = Float.POSITIVE_INFINITY}.
|
||||
* <p> Ranges are inclusive. For exclusive ranges, pass {@code nextUp(lowerValue)}
|
||||
* or {@code nextDown(upperValue)}.
|
||||
* <p>
|
||||
* Range comparisons are consistent with {@link Float#compareTo(Float)}.
|
||||
*
|
||||
* @param field field name. must not be {@code null}.
|
||||
* @param lowerValue lower portion of the range (inclusive).
|
||||
* @param upperValue upper portion of the range (inclusive).
|
||||
* @throws IllegalArgumentException if {@code field} is null.
|
||||
* @return a query matching documents within this range.
|
||||
*/
|
||||
public static Query newRangeQuery(String field, float lowerValue, float upperValue) {
|
||||
return newRangeQuery(field, new float[] { lowerValue }, new float[] { upperValue });
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a range query for n-dimensional half-float values. Bounds will be
|
||||
* rounded to the closest half-float if they cannot be represented accurately
|
||||
* as a half-float.
|
||||
* <p>
|
||||
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||
* by setting {@code lowerValue[i] = Float.NEGATIVE_INFINITY} or {@code upperValue[i] = Float.POSITIVE_INFINITY}.
|
||||
* <p> Ranges are inclusive. For exclusive ranges, pass {@code nextUp(lowerValue[i])}
|
||||
* or {@code nextDown(upperValue[i])}.
|
||||
* <p>
|
||||
* Range comparisons are consistent with {@link Float#compareTo(Float)}.
|
||||
*
|
||||
* @param field field name. must not be {@code null}.
|
||||
* @param lowerValue lower portion of the range (inclusive). must not be {@code null}.
|
||||
* @param upperValue upper portion of the range (inclusive). must not be {@code null}.
|
||||
* @throws IllegalArgumentException if {@code field} is null, if {@code lowerValue} is null, if {@code upperValue} is null,
|
||||
* or if {@code lowerValue.length != upperValue.length}
|
||||
* @return a query matching documents within this range.
|
||||
*/
|
||||
public static Query newRangeQuery(String field, float[] lowerValue, float[] upperValue) {
|
||||
PointRangeQuery.checkArgs(field, lowerValue, upperValue);
|
||||
return new PointRangeQuery(field, pack(lowerValue).bytes, pack(upperValue).bytes, lowerValue.length) {
|
||||
@Override
|
||||
protected String toString(int dimension, byte[] value) {
|
||||
return Float.toString(decodeDimension(value, 0));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a query matching any of the specified 1D values.
|
||||
* This is the points equivalent of {@code TermsQuery}.
|
||||
* Values will be rounded to the closest half-float if they
|
||||
* cannot be represented accurately as a half-float.
|
||||
*
|
||||
* @param field field name. must not be {@code null}.
|
||||
* @param values all values to match
|
||||
*/
|
||||
public static Query newSetQuery(String field, float... values) {
|
||||
|
||||
// Don't unexpectedly change the user's incoming values array:
|
||||
float[] sortedValues = values.clone();
|
||||
Arrays.sort(sortedValues);
|
||||
|
||||
final BytesRef encoded = new BytesRef(new byte[BYTES]);
|
||||
|
||||
return new PointInSetQuery(field, 1, BYTES,
|
||||
new PointInSetQuery.Stream() {
|
||||
|
||||
int upto;
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
if (upto == sortedValues.length) {
|
||||
return null;
|
||||
} else {
|
||||
encodeDimension(sortedValues[upto], encoded.bytes, 0);
|
||||
upto++;
|
||||
return encoded;
|
||||
}
|
||||
}
|
||||
}) {
|
||||
@Override
|
||||
protected String toString(byte[] value) {
|
||||
assert value.length == BYTES;
|
||||
return Float.toString(decodeDimension(value, 0));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
|
||||
*
|
||||
* @param field field name. must not be {@code null}.
|
||||
* @param values all values to match
|
||||
*/
|
||||
public static Query newSetQuery(String field, Collection<Float> values) {
|
||||
Float[] boxed = values.toArray(new Float[0]);
|
||||
float[] unboxed = new float[boxed.length];
|
||||
for (int i = 0; i < boxed.length; i++) {
|
||||
unboxed[i] = boxed[i];
|
||||
}
|
||||
return newSetQuery(field, unboxed);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,243 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestHalfFloatPoint extends LuceneTestCase {
|
||||
|
||||
private void testHalfFloat(String sbits, float value) {
|
||||
short bits = (short) Integer.parseInt(sbits, 2);
|
||||
float converted = HalfFloatPoint.shortBitsToHalfFloat(bits);
|
||||
assertEquals(value, converted, 0f);
|
||||
short bits2 = HalfFloatPoint.halfFloatToShortBits(converted);
|
||||
assertEquals(bits, bits2);
|
||||
}
|
||||
|
||||
public void testHalfFloatConversion() {
|
||||
assertEquals(0, HalfFloatPoint.halfFloatToShortBits(0f));
|
||||
assertEquals((short)(1 << 15), HalfFloatPoint.halfFloatToShortBits(-0f));
|
||||
assertEquals(0, HalfFloatPoint.halfFloatToShortBits(Float.MIN_VALUE)); // rounded to zero
|
||||
|
||||
testHalfFloat("0011110000000000", 1);
|
||||
testHalfFloat("0011110000000001", 1.0009765625f);
|
||||
testHalfFloat("1100000000000000", -2);
|
||||
testHalfFloat("0111101111111111", 65504); // max value
|
||||
testHalfFloat("0000010000000000", (float) Math.pow(2, -14)); // minimum positive normal
|
||||
testHalfFloat("0000001111111111", (float) (Math.pow(2, -14) - Math.pow(2, -24))); // maximum subnormal
|
||||
testHalfFloat("0000000000000001", (float) Math.pow(2, -24)); // minimum positive subnormal
|
||||
testHalfFloat("0000000000000000", 0f);
|
||||
testHalfFloat("1000000000000000", -0f);
|
||||
testHalfFloat("0111110000000000", Float.POSITIVE_INFINITY);
|
||||
testHalfFloat("1111110000000000", Float.NEGATIVE_INFINITY);
|
||||
testHalfFloat("0111111000000000", Float.NaN);
|
||||
testHalfFloat("0011010101010101", 0.333251953125f);
|
||||
}
|
||||
|
||||
public void testRoundShift() {
|
||||
assertEquals(0, HalfFloatPoint.roundShift(0, 2));
|
||||
assertEquals(0, HalfFloatPoint.roundShift(1, 2));
|
||||
assertEquals(0, HalfFloatPoint.roundShift(2, 2)); // tie so round to 0 since it ends with a 0
|
||||
assertEquals(1, HalfFloatPoint.roundShift(3, 2));
|
||||
assertEquals(1, HalfFloatPoint.roundShift(4, 2));
|
||||
assertEquals(1, HalfFloatPoint.roundShift(5, 2));
|
||||
assertEquals(2, HalfFloatPoint.roundShift(6, 2)); // tie so round to 2 since it ends with a 0
|
||||
assertEquals(2, HalfFloatPoint.roundShift(7, 2));
|
||||
assertEquals(2, HalfFloatPoint.roundShift(8, 2));
|
||||
assertEquals(2, HalfFloatPoint.roundShift(9, 2));
|
||||
assertEquals(2, HalfFloatPoint.roundShift(10, 2)); // tie so round to 2 since it ends with a 0
|
||||
assertEquals(3, HalfFloatPoint.roundShift(11, 2));
|
||||
assertEquals(3, HalfFloatPoint.roundShift(12, 2));
|
||||
assertEquals(3, HalfFloatPoint.roundShift(13, 2));
|
||||
assertEquals(4, HalfFloatPoint.roundShift(14, 2)); // tie so round to 4 since it ends with a 0
|
||||
assertEquals(4, HalfFloatPoint.roundShift(15, 2));
|
||||
assertEquals(4, HalfFloatPoint.roundShift(16, 2));
|
||||
}
|
||||
|
||||
public void testRounding() {
|
||||
float[] values = new float[0];
|
||||
int o = 0;
|
||||
for (int i = Short.MIN_VALUE; i <= Short.MAX_VALUE; ++i) {
|
||||
float v = HalfFloatPoint.sortableShortToHalfFloat((short) i);
|
||||
if (Float.isFinite(v)) {
|
||||
if (o == values.length) {
|
||||
values = ArrayUtil.grow(values);
|
||||
}
|
||||
values[o++] = v;
|
||||
}
|
||||
}
|
||||
values = Arrays.copyOf(values, o);
|
||||
|
||||
int iters = atLeast(1000000);
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
float f;
|
||||
if (random().nextBoolean()) {
|
||||
int floatBits = random().nextInt();
|
||||
f = Float.intBitsToFloat(floatBits);
|
||||
} else {
|
||||
f = (float) ((2 * random().nextFloat() - 1) * Math.pow(2, TestUtil.nextInt(random(), -16, 16)));
|
||||
}
|
||||
float rounded = HalfFloatPoint.shortBitsToHalfFloat(HalfFloatPoint.halfFloatToShortBits(f));
|
||||
if (Float.isFinite(f) == false) {
|
||||
assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(rounded), 0f);
|
||||
} else if (Float.isFinite(rounded) == false) {
|
||||
assertFalse(Float.isNaN(rounded));
|
||||
assertTrue(Math.abs(f) > 65520);
|
||||
} else {
|
||||
int index = Arrays.binarySearch(values, f);
|
||||
float closest;
|
||||
if (index >= 0) {
|
||||
closest = values[index];
|
||||
} else {
|
||||
index = -1 - index;
|
||||
closest = Float.POSITIVE_INFINITY;
|
||||
if (index < values.length) {
|
||||
closest = values[index];
|
||||
}
|
||||
if (index - 1 >= 0) {
|
||||
if (f - values[index - 1] < closest - f) {
|
||||
closest = values[index - 1];
|
||||
} else if (f - values[index - 1] == closest - f
|
||||
&& Integer.numberOfTrailingZeros(Float.floatToIntBits(values[index - 1])) > Integer.numberOfTrailingZeros(Float.floatToIntBits(closest))) {
|
||||
// in case of tie, round to even
|
||||
closest = values[index - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
assertEquals(closest, rounded, 0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortableBits() {
|
||||
int low = Short.MIN_VALUE;
|
||||
int high = Short.MAX_VALUE;
|
||||
while (Float.isNaN(HalfFloatPoint.sortableShortToHalfFloat((short) low))) {
|
||||
++low;
|
||||
}
|
||||
while (HalfFloatPoint.sortableShortToHalfFloat((short) low) == Float.NEGATIVE_INFINITY) {
|
||||
++low;
|
||||
}
|
||||
while (Float.isNaN(HalfFloatPoint.sortableShortToHalfFloat((short) high))) {
|
||||
--high;
|
||||
}
|
||||
while (HalfFloatPoint.sortableShortToHalfFloat((short) high) == Float.POSITIVE_INFINITY) {
|
||||
--high;
|
||||
}
|
||||
for (int i = low; i <= high + 1; ++i) {
|
||||
float previous = HalfFloatPoint.sortableShortToHalfFloat((short) (i - 1));
|
||||
float current = HalfFloatPoint.sortableShortToHalfFloat((short) i);
|
||||
assertEquals(i, HalfFloatPoint.halfFloatToSortableShort(current));
|
||||
assertTrue(Float.compare(previous, current) < 0);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortableBytes() {
|
||||
for (int i = Short.MIN_VALUE + 1; i <= Short.MAX_VALUE; ++i) {
|
||||
byte[] previous = new byte[HalfFloatPoint.BYTES];
|
||||
HalfFloatPoint.shortToSortableBytes((short) (i - 1), previous, 0);
|
||||
byte[] current = new byte[HalfFloatPoint.BYTES];
|
||||
HalfFloatPoint.shortToSortableBytes((short) i, current, 0);
|
||||
assertTrue(StringHelper.compare(HalfFloatPoint.BYTES, previous, 0, current, 0) < 0);
|
||||
assertEquals(i, HalfFloatPoint.sortableBytesToShort(current, 0));
|
||||
}
|
||||
}
|
||||
|
||||
/** Add a single value and search for it */
|
||||
public void testBasics() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
|
||||
// add a doc with an single dimension
|
||||
Document document = new Document();
|
||||
document.add(new HalfFloatPoint("field", 1.25f));
|
||||
writer.addDocument(document);
|
||||
|
||||
// search and verify we found our doc
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
assertEquals(1, searcher.count(HalfFloatPoint.newExactQuery("field", 1.25f)));
|
||||
assertEquals(0, searcher.count(HalfFloatPoint.newExactQuery("field", 1f)));
|
||||
assertEquals(0, searcher.count(HalfFloatPoint.newExactQuery("field", 2f)));
|
||||
assertEquals(1, searcher.count(HalfFloatPoint.newRangeQuery("field", 1f, 2f)));
|
||||
assertEquals(0, searcher.count(HalfFloatPoint.newRangeQuery("field", 0f, 1f)));
|
||||
assertEquals(0, searcher.count(HalfFloatPoint.newRangeQuery("field", 1.5f, 2f)));
|
||||
assertEquals(1, searcher.count(HalfFloatPoint.newSetQuery("field", 1.25f)));
|
||||
assertEquals(1, searcher.count(HalfFloatPoint.newSetQuery("field", 1f, 1.25f)));
|
||||
assertEquals(0, searcher.count(HalfFloatPoint.newSetQuery("field", 1f)));
|
||||
assertEquals(0, searcher.count(HalfFloatPoint.newSetQuery("field")));
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Add a single multi-dimensional value and search for it */
|
||||
public void testBasicsMultiDims() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
|
||||
// add a doc with two dimensions
|
||||
Document document = new Document();
|
||||
document.add(new HalfFloatPoint("field", 1.25f, -2f));
|
||||
writer.addDocument(document);
|
||||
|
||||
// search and verify we found our doc
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
assertEquals(1, searcher.count(HalfFloatPoint.newRangeQuery("field",
|
||||
new float[]{0, -5}, new float[]{1.25f, -1})));
|
||||
assertEquals(0, searcher.count(HalfFloatPoint.newRangeQuery("field",
|
||||
new float[]{0, 0}, new float[]{2, 2})));
|
||||
assertEquals(0, searcher.count(HalfFloatPoint.newRangeQuery("field",
|
||||
new float[]{-10, -10}, new float[]{1, 2})));
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNextUp() {
|
||||
assertEquals(Float.NaN, HalfFloatPoint.nextUp(Float.NaN), 0f);
|
||||
assertEquals(Float.POSITIVE_INFINITY, HalfFloatPoint.nextUp(Float.POSITIVE_INFINITY), 0f);
|
||||
assertEquals(-65504, HalfFloatPoint.nextUp(Float.NEGATIVE_INFINITY), 0f);
|
||||
assertEquals(HalfFloatPoint.shortBitsToHalfFloat((short) 0), HalfFloatPoint.nextUp(-0f), 0f);
|
||||
assertEquals(HalfFloatPoint.shortBitsToHalfFloat((short) 1), HalfFloatPoint.nextUp(0f), 0f);
|
||||
// values that cannot be exactly represented as a half float
|
||||
assertEquals(HalfFloatPoint.nextUp(0f), HalfFloatPoint.nextUp(Float.MIN_VALUE), 0f);
|
||||
assertEquals(Float.floatToIntBits(-0f), Float.floatToIntBits(HalfFloatPoint.nextUp(-Float.MIN_VALUE)));
|
||||
}
|
||||
|
||||
public void testNextDown() {
|
||||
assertEquals(Float.NaN, HalfFloatPoint.nextDown(Float.NaN), 0f);
|
||||
assertEquals(Float.NEGATIVE_INFINITY, HalfFloatPoint.nextDown(Float.NEGATIVE_INFINITY), 0f);
|
||||
assertEquals(65504, HalfFloatPoint.nextDown(Float.POSITIVE_INFINITY), 0f);
|
||||
assertEquals(Float.floatToIntBits(-0f), Float.floatToIntBits(HalfFloatPoint.nextDown(0f)));
|
||||
// values that cannot be exactly represented as a half float
|
||||
assertEquals(Float.floatToIntBits(0f), Float.floatToIntBits(HalfFloatPoint.nextDown(Float.MIN_VALUE)));
|
||||
assertEquals(HalfFloatPoint.nextDown(-0f), HalfFloatPoint.nextDown(-Float.MIN_VALUE), 0f);
|
||||
}
|
||||
}
|
|
@ -142,6 +142,9 @@ New Features
|
|||
|
||||
* SOLR-8323, SOLR-9113: Add CollectionStateWatcher API (Alan Woodward, Scott Blum)
|
||||
|
||||
* SOLR-8988: Adds query option facet.distrib.mco which when set to true allows the use of facet.mincount=1 in cloud mode.
|
||||
(Keith Laban, Dennis Gove)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
@ -229,6 +232,8 @@ Bug Fixes
|
|||
* SOLR-8801: /bin/solr create script always returns exit code 0 when a collection/core already exists.
|
||||
(Khalid Alharbi, Marius Grama via Steve Rowe)
|
||||
|
||||
* SOLR-9134: Fix RestManager.addManagedResource return value. (Christine Poerschke)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.
|
||||
|
|
|
@ -563,9 +563,16 @@ public class FacetComponent extends SearchComponent {
|
|||
// set the initial limit higher to increase accuracy
|
||||
dff.initialLimit = doOverRequestMath(dff.initialLimit, dff.overrequestRatio,
|
||||
dff.overrequestCount);
|
||||
dff.initialMincount = 0; // TODO: we could change this to 1, but would
|
||||
// then need more refinement for small facet
|
||||
// result sets?
|
||||
|
||||
// If option FACET_DISTRIB_MCO is turned on then we will use 1 as the initial
|
||||
// minCount (unless the user explicitly set it to something less than 1). If
|
||||
// option FACET_DISTRIB_MCO is turned off then we will use 0 as the initial
|
||||
// minCount regardless of what the user might have provided (prior to the
|
||||
// addition of the FACET_DISTRIB_MCO option the default logic was to use 0).
|
||||
// As described in issues SOLR-8559 and SOLR-8988 the use of 1 provides a
|
||||
// significant performance boost.
|
||||
dff.initialMincount = dff.mco ? Math.min(dff.minCount, 1) : 0;
|
||||
|
||||
} else {
|
||||
// if limit==-1, then no need to artificially lower mincount to 0 if
|
||||
// it's 1
|
||||
|
@ -1415,6 +1422,7 @@ public class FacetComponent extends SearchComponent {
|
|||
|
||||
public int initialLimit; // how many terms requested in first phase
|
||||
public int initialMincount; // mincount param sent to each shard
|
||||
public boolean mco;
|
||||
public double overrequestRatio;
|
||||
public int overrequestCount;
|
||||
public boolean needRefinements;
|
||||
|
@ -1433,7 +1441,9 @@ public class FacetComponent extends SearchComponent {
|
|||
= params.getFieldDouble(field, FacetParams.FACET_OVERREQUEST_RATIO, 1.5);
|
||||
this.overrequestCount
|
||||
= params.getFieldInt(field, FacetParams.FACET_OVERREQUEST_COUNT, 10);
|
||||
|
||||
|
||||
this.mco
|
||||
= params.getFieldBool(field, FacetParams.FACET_DISTRIB_MCO, false);
|
||||
}
|
||||
|
||||
void add(int shardNum, NamedList shardCounts, int numRequested) {
|
||||
|
|
|
@ -647,11 +647,11 @@ public class RestManager {
|
|||
* Restlet router. Returns the corresponding instance.
|
||||
*/
|
||||
public synchronized ManagedResource addManagedResource(String resourceId, Class<? extends ManagedResource> clazz) {
|
||||
ManagedResource res = null;
|
||||
ManagedResourceRegistration existingReg = registry.registered.get(resourceId);
|
||||
final ManagedResource res;
|
||||
final ManagedResourceRegistration existingReg = registry.registered.get(resourceId);
|
||||
if (existingReg == null) {
|
||||
registry.registerManagedResource(resourceId, clazz, null);
|
||||
addRegisteredResource(registry.registered.get(resourceId));
|
||||
res = addRegisteredResource(registry.registered.get(resourceId));
|
||||
} else {
|
||||
res = getManagedResource(resourceId);
|
||||
}
|
||||
|
|
|
@ -149,7 +149,7 @@ public class ConnectionReuseTest extends SolrCloudTestCase {
|
|||
// we try and make sure the connection we get has handled all of the requests in this test
|
||||
if (client instanceof ConcurrentUpdateSolrClient) {
|
||||
// we can't fully control queue polling breaking up requests - allow a bit of leeway
|
||||
int exp = cnt1 + queueBreaks + 2;
|
||||
int exp = queueBreaks + 3;
|
||||
assertTrue(
|
||||
"We expected all communication via streaming client to use one connection! expected=" + exp + " got="
|
||||
+ metrics.getRequestCount(),
|
||||
|
|
|
@ -53,6 +53,7 @@ import static org.apache.solr.common.params.FacetParams.FACET_OVERREQUEST_RATIO;
|
|||
import static org.apache.solr.common.params.FacetParams.FACET_PIVOT;
|
||||
import static org.apache.solr.common.params.FacetParams.FACET_PIVOT_MINCOUNT;
|
||||
import static org.apache.solr.common.params.FacetParams.FACET_SORT;
|
||||
import static org.apache.solr.common.params.FacetParams.FACET_DISTRIB_MCO;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
|
@ -84,6 +85,8 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
|
|||
// param used by test purely for tracing & validation
|
||||
private static String TRACE_MIN = "_test_min";
|
||||
// param used by test purely for tracing & validation
|
||||
private static String TRACE_DISTRIB_MIN = "_test_distrib_min";
|
||||
// param used by test purely for tracing & validation
|
||||
private static String TRACE_MISS = "_test_miss";
|
||||
// param used by test purely for tracing & validation
|
||||
private static String TRACE_SORT = "_test_sort";
|
||||
|
@ -190,6 +193,12 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase {
|
|||
// trace param for validation
|
||||
baseP.add(TRACE_MIN, min);
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
pivotP.add(FACET_DISTRIB_MCO, "true");
|
||||
// trace param for validation
|
||||
baseP.add(TRACE_DISTRIB_MIN, "true");
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
String missing = ""+random().nextBoolean();
|
||||
|
|
|
@ -122,6 +122,23 @@ public interface FacetParams {
|
|||
public static final String FACET_OVERREQUEST_COUNT = FACET_OVERREQUEST + ".count";
|
||||
|
||||
|
||||
public static final String FACET_DISTRIB = FACET + ".distrib";
|
||||
|
||||
/**
|
||||
* If we are returning facet field counts, are sorting those facets by their count, and the minimum count to return is > 0,
|
||||
* then allow the use of facet.mincount = 1 in cloud mode. To enable this use facet.distrib.mco=true.
|
||||
*
|
||||
* i.e. If the following three conditions are met in cloud mode: facet.sort=count, facet.limit > 0, facet.mincount > 0.
|
||||
* Then use facet.mincount=1.
|
||||
*
|
||||
* Previously and by default facet.mincount will be explicitly set to 0 when in cloud mode for this condition.
|
||||
* In SOLR-8599 and SOLR-8988, significant performance increase has been seen when enabling this optimization.
|
||||
*
|
||||
* Note: enabling this flag has no effect when the conditions above are not met. For those other cases the default behavior is sufficient.
|
||||
*/
|
||||
|
||||
public static final String FACET_DISTRIB_MCO = FACET_DISTRIB + ".mco";
|
||||
|
||||
/**
|
||||
* Comma separated list of fields to pivot
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue