Simplify LongHashSet by completely removing java.util.Set APIs (#12133)

This commit is contained in:
Uwe Schindler 2023-02-06 22:43:20 +01:00 committed by GitHub
parent 8564da434d
commit 57403e26e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 41 deletions

View File

@ -127,7 +127,9 @@ Optimizations
in order to achieve the same false positive probability with less memory.
(Jean-François Boeuf)
* GITHUB#12118 Optimize FeatureQuery to TermQuery & weight when scoring is not required (Ben Trent, Robert Muir)
* GITHUB#12118: Optimize FeatureQuery to TermQuery & weight when scoring is not required. (Ben Trent, Robert Muir)
* GITHUB#12128, GITHUB#12133: Speed up docvalues set query by making use of sortedness. (Robert Muir, Uwe Schindler)
Bug Fixes
---------------------

View File

@ -17,9 +17,9 @@
package org.apache.lucene.document;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.LongStream;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
@ -101,6 +101,15 @@ final class LongHashSet implements Accountable {
}
}
/** returns a stream of all values contained in this set */
LongStream stream() {
LongStream stream = Arrays.stream(table).filter(v -> v != MISSING);
if (hasMissingValue) {
stream = LongStream.concat(LongStream.of(MISSING), stream);
}
return stream;
}
@Override
public int hashCode() {
return Objects.hash(size, minValue, maxValue, mask, hasMissingValue, Arrays.hashCode(table));
@ -122,23 +131,7 @@ final class LongHashSet implements Accountable {
@Override
public String toString() {
StringBuilder sb = new StringBuilder("[");
boolean seenValue = false;
if (hasMissingValue) {
sb.append(MISSING);
seenValue = true;
}
for (long v : table) {
if (v != MISSING) {
if (seenValue) {
sb.append(", ");
}
sb.append(v);
seenValue = true;
}
}
sb.append("]");
return sb.toString();
return stream().mapToObj(String::valueOf).collect(Collectors.joining(", ", "[", "]"));
}
/** number of elements in the set */
@ -150,18 +143,4 @@ final class LongHashSet implements Accountable {
public long ramBytesUsed() {
return BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(table);
}
// for testing only
Set<Long> toSet() {
Set<Long> set = new HashSet<>();
if (hasMissingValue) {
set.add(MISSING);
}
for (long v : table) {
if (v != MISSING) {
set.add(v);
}
}
return set;
}
}

View File

@ -26,7 +26,7 @@ import org.apache.lucene.tests.util.LuceneTestCase;
public class TestLongHashSet extends LuceneTestCase {
private void assertEquals(Set<Long> set1, LongHashSet longHashSet) {
Set<Long> set2 = longHashSet.toSet();
Set<Long> set2 = longHashSet.stream().boxed().collect(Collectors.toSet());
LuceneTestCase.assertEquals(set1, set2);
@ -44,13 +44,13 @@ public class TestLongHashSet extends LuceneTestCase {
}
private void assertNotEquals(Set<Long> set1, LongHashSet longHashSet) {
Set<Long> set2 = longHashSet.toSet();
Set<Long> set2 = longHashSet.stream().boxed().collect(Collectors.toSet());
LuceneTestCase.assertNotEquals(set1, set2);
LongHashSet set3 = new LongHashSet(set1.stream().mapToLong(Long::longValue).sorted().toArray());
LuceneTestCase.assertNotEquals(set2, set3.toSet());
LuceneTestCase.assertNotEquals(set2, set3.stream().boxed().collect(Collectors.toSet()));
}
public void testEmpty() {
@ -103,10 +103,7 @@ public class TestLongHashSet extends LuceneTestCase {
if (values.length > 0 && random().nextBoolean()) {
values[values.length / 2] = Long.MIN_VALUE;
}
Set<Long> set1 =
LongStream.of(values)
.mapToObj(Long::valueOf)
.collect(Collectors.toCollection(HashSet::new));
Set<Long> set1 = LongStream.of(values).mapToObj(Long::valueOf).collect(Collectors.toSet());
Arrays.sort(values);
LongHashSet set2 = new LongHashSet(values);
assertEquals(set1, set2);