mirror of https://github.com/apache/lucene.git
LUCENE-5016: Sampling can break FacetResult labeling
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1487807 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b4bf678713
commit
376b331651
|
@ -118,6 +118,11 @@ Bug Fixes
|
|||
for scoringQueries. Instead use QueryValueSource to safely wrap arbitrary
|
||||
queries and use them with CustomScoreQuery. (John Wang, Robert Muir)
|
||||
|
||||
* LUCENE-5016: SamplingAccumulator returned inconsistent label if asked to
|
||||
aggregate a non-existing category. Also fixed a bug in RangeAccumulator if
|
||||
some readers did not have the requested numeric DV field.
|
||||
(Rob Audenaerde, Shai Erera)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4936: Improve numeric doc values compression in case all values share
|
||||
|
|
|
@ -64,7 +64,7 @@ public class RangeAccumulator extends FacetsAccumulator {
|
|||
throw new IllegalArgumentException("only flat (dimension only) CategoryPath is allowed");
|
||||
}
|
||||
|
||||
RangeFacetRequest<?> rfr = (RangeFacetRequest) fr;
|
||||
RangeFacetRequest<?> rfr = (RangeFacetRequest<?>) fr;
|
||||
|
||||
requests.add(new RangeSet(rfr.ranges, rfr.categoryPath.components[0]));
|
||||
}
|
||||
|
@ -86,8 +86,11 @@ public class RangeAccumulator extends FacetsAccumulator {
|
|||
RangeSet ranges = requests.get(i);
|
||||
|
||||
int[] counts = new int[ranges.ranges.length];
|
||||
for(MatchingDocs hits : matchingDocs) {
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
NumericDocValues ndv = hits.context.reader().getNumericDocValues(ranges.field);
|
||||
if (ndv == null) {
|
||||
continue; // no numeric values for this field in this reader
|
||||
}
|
||||
final int length = hits.bits.length();
|
||||
int doc = 0;
|
||||
while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) {
|
||||
|
|
|
@ -209,7 +209,7 @@ public abstract class Sampler {
|
|||
super(orig.categoryPath, num);
|
||||
this.orig = orig;
|
||||
setDepth(orig.getDepth());
|
||||
setNumLabel(orig.getNumLabel());
|
||||
setNumLabel(0); // don't label anything as we're over-sampling
|
||||
setResultMode(orig.getResultMode());
|
||||
setSortOrder(orig.getSortOrder());
|
||||
}
|
||||
|
|
|
@ -87,7 +87,7 @@ public class SamplingAccumulator extends StandardFacetsAccumulator {
|
|||
|
||||
List<FacetResult> sampleRes = super.accumulate(docids);
|
||||
|
||||
List<FacetResult> fixedRes = new ArrayList<FacetResult>();
|
||||
List<FacetResult> results = new ArrayList<FacetResult>();
|
||||
for (FacetResult fres : sampleRes) {
|
||||
// for sure fres is not null because this is guaranteed by the delegee.
|
||||
PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
|
||||
|
@ -104,13 +104,18 @@ public class SamplingAccumulator extends StandardFacetsAccumulator {
|
|||
}
|
||||
|
||||
// final labeling if allowed (because labeling is a costly operation)
|
||||
frh.labelResult(fres);
|
||||
fixedRes.add(fres); // add to final results
|
||||
if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
|
||||
// category does not exist, add an empty result
|
||||
results.add(emptyResult(fres.getFacetResultNode().ordinal, fres.getFacetRequest()));
|
||||
} else {
|
||||
frh.labelResult(fres);
|
||||
results.add(fres);
|
||||
}
|
||||
}
|
||||
|
||||
searchParams = original; // Back to original params
|
||||
|
||||
return fixedRes;
|
||||
return results;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -10,6 +10,7 @@ import org.apache.lucene.facet.sampling.Sampler.SampleResult;
|
|||
import org.apache.lucene.facet.search.FacetResult;
|
||||
import org.apache.lucene.facet.search.ScoredDocIDs;
|
||||
import org.apache.lucene.facet.search.StandardFacetsAccumulator;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -62,7 +63,7 @@ public class SamplingWrapper extends StandardFacetsAccumulator {
|
|||
|
||||
List<FacetResult> sampleRes = delegee.accumulate(sampleSet.docids);
|
||||
|
||||
List<FacetResult> fixedRes = new ArrayList<FacetResult>();
|
||||
List<FacetResult> results = new ArrayList<FacetResult>();
|
||||
SampleFixer sampleFixer = sampler.samplingParams.getSampleFixer();
|
||||
|
||||
for (FacetResult fres : sampleRes) {
|
||||
|
@ -80,15 +81,20 @@ public class SamplingWrapper extends StandardFacetsAccumulator {
|
|||
}
|
||||
|
||||
// final labeling if allowed (because labeling is a costly operation)
|
||||
frh.labelResult(fres);
|
||||
fixedRes.add(fres); // add to final results
|
||||
if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
|
||||
// category does not exist, add an empty result
|
||||
results.add(emptyResult(fres.getFacetResultNode().ordinal, fres.getFacetRequest()));
|
||||
} else {
|
||||
frh.labelResult(fres);
|
||||
results.add(fres);
|
||||
}
|
||||
}
|
||||
|
||||
if (shouldOversample) {
|
||||
delegee.searchParams = original; // Back to original params
|
||||
}
|
||||
|
||||
return fixedRes;
|
||||
return results;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,8 +17,20 @@ import org.apache.lucene.facet.params.CategoryListParams;
|
|||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.params.PerDimensionIndexingParams;
|
||||
import org.apache.lucene.facet.range.LongRange;
|
||||
import org.apache.lucene.facet.range.RangeAccumulator;
|
||||
import org.apache.lucene.facet.range.RangeFacetRequest;
|
||||
import org.apache.lucene.facet.sampling.RandomSampler;
|
||||
import org.apache.lucene.facet.sampling.Sampler;
|
||||
import org.apache.lucene.facet.sampling.SamplingAccumulator;
|
||||
import org.apache.lucene.facet.sampling.SamplingParams;
|
||||
import org.apache.lucene.facet.sampling.SamplingWrapper;
|
||||
import org.apache.lucene.facet.sampling.TakmiSampleFixer;
|
||||
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesAccumulator;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
|
@ -384,5 +396,72 @@ public class TestFacetsCollector extends FacetTestCase {
|
|||
|
||||
IOUtils.close(taxo, taxoDir, r, indexDir);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testLabeling() throws Exception {
|
||||
Directory indexDir = newDirectory(), taxoDir = newDirectory();
|
||||
|
||||
// create the index
|
||||
IndexWriter indexWriter = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||
FacetFields facetFields = new FacetFields(taxoWriter);
|
||||
Document doc = new Document();
|
||||
facetFields.addFields(doc, Arrays.asList(new CategoryPath("A/1", '/')));
|
||||
indexWriter.addDocument(doc);
|
||||
IOUtils.close(indexWriter, taxoWriter);
|
||||
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
// ask to count a non-existing category to test labeling
|
||||
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("B"), 5));
|
||||
|
||||
final SamplingParams sampleParams = new SamplingParams();
|
||||
sampleParams.setMaxSampleSize(100);
|
||||
sampleParams.setMinSampleSize(100);
|
||||
sampleParams.setSamplingThreshold(100);
|
||||
sampleParams.setOversampleFactor(1.0d);
|
||||
if (random().nextBoolean()) {
|
||||
sampleParams.setSampleFixer(new TakmiSampleFixer(indexReader, taxoReader, fsp));
|
||||
}
|
||||
final Sampler sampler = new RandomSampler(sampleParams, random());
|
||||
|
||||
FacetsAccumulator[] accumulators = new FacetsAccumulator[] {
|
||||
new FacetsAccumulator(fsp, indexReader, taxoReader),
|
||||
new StandardFacetsAccumulator(fsp, indexReader, taxoReader),
|
||||
new SamplingAccumulator(sampler, fsp, indexReader, taxoReader),
|
||||
new AdaptiveFacetsAccumulator(fsp, indexReader, taxoReader),
|
||||
new SamplingWrapper(new StandardFacetsAccumulator(fsp, indexReader, taxoReader), sampler)
|
||||
};
|
||||
|
||||
for (FacetsAccumulator fa : accumulators) {
|
||||
FacetsCollector fc = FacetsCollector.create(fa);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
assertNotNull(facetResults);
|
||||
assertEquals("incorrect label returned for " + fa, fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label);
|
||||
}
|
||||
|
||||
try {
|
||||
// SortedSetDocValuesAccumulator cannot even be created in such state
|
||||
assertNull(new SortedSetDocValuesAccumulator(fsp, new SortedSetDocValuesReaderState(indexReader)));
|
||||
// if this ever changes, make sure FacetResultNode is labeled correctly
|
||||
fail("should not have succeeded to execute a request over a category which wasn't indexed as SortedSetDVField");
|
||||
} catch (IllegalArgumentException e) {
|
||||
// expected
|
||||
}
|
||||
|
||||
fsp = new FacetSearchParams(new RangeFacetRequest<LongRange>("f", new LongRange("grr", 0, true, 1, true)));
|
||||
RangeAccumulator ra = new RangeAccumulator(fsp, indexReader);
|
||||
FacetsCollector fc = FacetsCollector.create(ra);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
assertNotNull(facetResults);
|
||||
assertEquals("incorrect label returned for RangeAccumulator", fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label);
|
||||
|
||||
IOUtils.close(indexReader, taxoReader);
|
||||
|
||||
IOUtils.close(indexDir, taxoDir);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue