LUCENE-10530: Avoid floating point precision bug in TestTaxonomyFacetAssociations (#848)

This commit is contained in:
Greg Miller 2022-04-29 08:57:46 -07:00 committed by GitHub
parent 0dad9ddae8
commit 902a7df0e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 45 additions and 7 deletions

View File

@ -155,6 +155,9 @@ Bug Fixes
* LUCENE-10470: Check if polygon has been successfully tessellated before we fail (we are failing some valid
tessellations) and allow filtering edges that fold on top of the previous one. (Ignacio Vera)
* LUCENE-10530: Avoid floating point precision test case bug in TestTaxonomyFacetAssociations.
(Greg Miller)
Build
---------------------

View File

@ -31,11 +31,17 @@ import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -115,7 +121,6 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
}
if (random().nextBoolean()) { // maybe index a float association with the dim
float nextFloat = random().nextFloat() * 10000f;
randomFloatValues.computeIfAbsent(path, k -> new ArrayList<>()).add(nextFloat);
doc.add(new FloatAssociationFacetField(nextFloat, "float_random", path));
}
}
@ -129,7 +134,6 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
}
if (random().nextBoolean()) {
float nextFloat = random().nextFloat() * 10000f;
randomFloatSingleValued.computeIfAbsent(path, k -> new ArrayList<>()).add(nextFloat);
doc.add(new FloatAssociationFacetField(nextFloat, "float_single_valued", path));
}
}
@ -141,6 +145,34 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
reader = writer.getReader();
writer.close();
taxoReader = new DirectoryTaxonomyReader(taxoDir);
// To avoid floating point precision issues, it's useful to keep track of the values in the
// exact same order they appear when iterating the doc values in the index. This ensures we
// sum them in the same order when determining expected values for tests cases and when the
// actual facets implementation sums them. See LUCENE-10530:
for (LeafReaderContext ctx : reader.leaves()) {
BinaryDocValues dv = DocValues.getBinary(ctx.reader(), "$facets.float");
for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc()) {
final BytesRef bytesRef = dv.binaryValue();
byte[] bytes = bytesRef.bytes;
int end = bytesRef.offset + bytesRef.length;
int offset = bytesRef.offset;
while (offset < end) {
int ord = (int) BitUtil.VH_BE_INT.get(bytes, offset);
offset += 4;
float value = (float) BitUtil.VH_BE_FLOAT.get(bytes, offset);
offset += 4;
FacetLabel label = taxoReader.getPath(ord);
String dim = label.components[0];
String child = label.components[1];
if ("float_random".equals(dim)) {
randomFloatValues.computeIfAbsent(child, k -> new ArrayList<>()).add(value);
} else if ("float_single_valued".equals(dim)) {
randomFloatSingleValued.computeIfAbsent(child, k -> new ArrayList<>()).add(value);
}
}
}
}
}
@AfterClass
@ -449,7 +481,10 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
float aggregatedValue = 0f;
for (Map.Entry<String, Float> e : expected.entrySet()) {
float value = e.getValue();
assertEquals(value, facets.getSpecificValue(dim, e.getKey()).floatValue(), 1);
// We can expect the floats to be exactly equal here since we're ensuring that we sum them
// in the same order when determining expected values and when computing facets. See
// LUCENE-10530:
assertEquals(value, facets.getSpecificValue(dim, e.getKey()).floatValue(), 0f);
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
}
@ -465,7 +500,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
assertNull(facetResult);
} else {
assertEquals(dim, facetResult.dim);
assertEquals(aggregatedValue, facetResult.value.floatValue(), 1);
assertEquals(aggregatedValue, facetResult.value.floatValue(), 1f);
assertEquals(expected.size(), facetResult.childCount);
}
}