LUCENE-10530: Avoid floating point precision bug in TestTaxonomyFacetAssociations (#848)

This commit is contained in:
Greg Miller 2022-04-29 08:57:46 -07:00 committed by GitHub
parent 0dad9ddae8
commit 902a7df0e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 45 additions and 7 deletions

View File

@ -80,7 +80,7 @@ New Features
to speed up computing the number of hits when possible. (Lu Xugang, Luca Cavanna, Adrien Grand)
* LUCENE-10422: Monitor Improvements: `Monitor` can use a custom `Directory`
implementation. `Monitor` can be created with a readonly `QueryIndex` in order to
implementation. `Monitor` can be created with a readonly `QueryIndex` in order to
have readonly `Monitor` instances. (Niko Usai)
* LUCENE-10456: Implement rewrite and Weight#count for MultiRangeQuery
@ -153,8 +153,11 @@ Bug Fixes
no documents instead of throwing an NPE. (Greg Miller)
* LUCENE-10470: Check if polygon has been successfully tessellated before we fail (we are failing some valid
tessellations) and allow filtering edges that fold on top of the previous one. (Ignacio Vera)
tessellations) and allow filtering edges that fold on top of the previous one. (Ignacio Vera)
* LUCENE-10530: Avoid floating point precision test case bug in TestTaxonomyFacetAssociations.
(Greg Miller)
Build
---------------------

View File

@ -31,11 +31,17 @@ import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -115,7 +121,6 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
}
if (random().nextBoolean()) { // maybe index a float association with the dim
float nextFloat = random().nextFloat() * 10000f;
randomFloatValues.computeIfAbsent(path, k -> new ArrayList<>()).add(nextFloat);
doc.add(new FloatAssociationFacetField(nextFloat, "float_random", path));
}
}
@ -129,7 +134,6 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
}
if (random().nextBoolean()) {
float nextFloat = random().nextFloat() * 10000f;
randomFloatSingleValued.computeIfAbsent(path, k -> new ArrayList<>()).add(nextFloat);
doc.add(new FloatAssociationFacetField(nextFloat, "float_single_valued", path));
}
}
@ -141,6 +145,34 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
reader = writer.getReader();
writer.close();
taxoReader = new DirectoryTaxonomyReader(taxoDir);
// To avoid floating point precision issues, it's useful to keep track of the values in the
// exact same order they appear when iterating the doc values in the index. This ensures we
// sum them in the same order when determining expected values for tests cases and when the
// actual facets implementation sums them. See LUCENE-10530:
for (LeafReaderContext ctx : reader.leaves()) {
BinaryDocValues dv = DocValues.getBinary(ctx.reader(), "$facets.float");
for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc()) {
final BytesRef bytesRef = dv.binaryValue();
byte[] bytes = bytesRef.bytes;
int end = bytesRef.offset + bytesRef.length;
int offset = bytesRef.offset;
while (offset < end) {
int ord = (int) BitUtil.VH_BE_INT.get(bytes, offset);
offset += 4;
float value = (float) BitUtil.VH_BE_FLOAT.get(bytes, offset);
offset += 4;
FacetLabel label = taxoReader.getPath(ord);
String dim = label.components[0];
String child = label.components[1];
if ("float_random".equals(dim)) {
randomFloatValues.computeIfAbsent(child, k -> new ArrayList<>()).add(value);
} else if ("float_single_valued".equals(dim)) {
randomFloatSingleValued.computeIfAbsent(child, k -> new ArrayList<>()).add(value);
}
}
}
}
}
@AfterClass
@ -449,7 +481,10 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
float aggregatedValue = 0f;
for (Map.Entry<String, Float> e : expected.entrySet()) {
float value = e.getValue();
assertEquals(value, facets.getSpecificValue(dim, e.getKey()).floatValue(), 1);
// We can expect the floats to be exactly equal here since we're ensuring that we sum them
// in the same order when determining expected values and when computing facets. See
// LUCENE-10530:
assertEquals(value, facets.getSpecificValue(dim, e.getKey()).floatValue(), 0f);
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
}
@ -465,7 +500,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
assertNull(facetResult);
} else {
assertEquals(dim, facetResult.dim);
assertEquals(aggregatedValue, facetResult.value.floatValue(), 1);
assertEquals(aggregatedValue, facetResult.value.floatValue(), 1f);
assertEquals(expected.size(), facetResult.childCount);
}
}