mirror of https://github.com/apache/lucene.git
SOLR-445: Merge remote-tracking branch 'refs/remotes/origin/branch_6x' into branch_6x
(picking up mid backport conflicts)
This commit is contained in:
commit
b8c0ff66f9
|
@ -42,8 +42,8 @@ import org.apache.lucene.util.bkd.BKDWriter;
|
||||||
/** Writes dimensional values */
|
/** Writes dimensional values */
|
||||||
public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
||||||
|
|
||||||
final IndexOutput dataOut;
|
protected final IndexOutput dataOut;
|
||||||
final Map<String,Long> indexFPs = new HashMap<>();
|
protected final Map<String,Long> indexFPs = new HashMap<>();
|
||||||
final SegmentWriteState writeState;
|
final SegmentWriteState writeState;
|
||||||
final int maxPointsInLeafNode;
|
final int maxPointsInLeafNode;
|
||||||
final double maxMBSortInHeap;
|
final double maxMBSortInHeap;
|
||||||
|
|
|
@ -1033,8 +1033,7 @@ public class BKDWriter implements Closeable {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: make this protected when we want to subclass to play with different splitting criteria
|
protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||||
private int split(byte[] minPackedValue, byte[] maxPackedValue) {
|
|
||||||
// Find which dim has the largest span so we can split on it:
|
// Find which dim has the largest span so we can split on it:
|
||||||
int splitDim = -1;
|
int splitDim = -1;
|
||||||
for(int dim=0;dim<numDims;dim++) {
|
for(int dim=0;dim<numDims;dim++) {
|
||||||
|
|
|
@ -52,8 +52,11 @@ import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
|
||||||
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
|
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
|
||||||
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
|
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
|
||||||
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
|
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
|
||||||
|
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.lucene.util.bkd.BKDWriter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Codec that assigns per-field random postings formats.
|
* Codec that assigns per-field random postings formats.
|
||||||
|
@ -93,13 +96,55 @@ public class RandomCodec extends AssertingCodec {
|
||||||
// TODO: improve how we randomize this...
|
// TODO: improve how we randomize this...
|
||||||
private final int maxPointsInLeafNode;
|
private final int maxPointsInLeafNode;
|
||||||
private final double maxMBSortInHeap;
|
private final double maxMBSortInHeap;
|
||||||
|
private final int bkdSplitRandomSeed;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public PointsFormat pointsFormat() {
|
public PointsFormat pointsFormat() {
|
||||||
return new AssertingPointsFormat(new PointsFormat() {
|
return new AssertingPointsFormat(new PointsFormat() {
|
||||||
@Override
|
@Override
|
||||||
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||||
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
|
|
||||||
|
// Randomize how BKDWriter chooses its splis:
|
||||||
|
|
||||||
|
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
|
||||||
|
@Override
|
||||||
|
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||||
|
|
||||||
|
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
|
||||||
|
|
||||||
|
try (BKDWriter writer = new RandomlySplittingBKDWriter(writeState.segmentInfo.maxDoc(),
|
||||||
|
writeState.directory,
|
||||||
|
writeState.segmentInfo.name,
|
||||||
|
fieldInfo.getPointDimensionCount(),
|
||||||
|
fieldInfo.getPointNumBytes(),
|
||||||
|
maxPointsInLeafNode,
|
||||||
|
maxMBSortInHeap,
|
||||||
|
values.size(fieldInfo.name),
|
||||||
|
singleValuePerDoc,
|
||||||
|
bkdSplitRandomSeed ^ fieldInfo.name.hashCode())) {
|
||||||
|
values.intersect(fieldInfo.name, new IntersectVisitor() {
|
||||||
|
@Override
|
||||||
|
public void visit(int docID) {
|
||||||
|
throw new IllegalStateException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(int docID, byte[] packedValue) throws IOException {
|
||||||
|
writer.add(packedValue, docID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||||
|
return PointValues.Relation.CELL_CROSSES_QUERY;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// We could have 0 points on merge since all docs with dimensional fields may be deleted:
|
||||||
|
if (writer.getPointCount() > 0) {
|
||||||
|
indexFPs.put(fieldInfo.name, writer.finish(dataOut));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -152,6 +197,7 @@ public class RandomCodec extends AssertingCodec {
|
||||||
|
|
||||||
maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048);
|
maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048);
|
||||||
maxMBSortInHeap = 4.0 + (3*random.nextDouble());
|
maxMBSortInHeap = 4.0 + (3*random.nextDouble());
|
||||||
|
bkdSplitRandomSeed = random.nextInt();
|
||||||
|
|
||||||
add(avoidCodecs,
|
add(avoidCodecs,
|
||||||
TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock),
|
TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock),
|
||||||
|
@ -221,4 +267,24 @@ public class RandomCodec extends AssertingCodec {
|
||||||
", maxPointsInLeafNode=" + maxPointsInLeafNode +
|
", maxPointsInLeafNode=" + maxPointsInLeafNode +
|
||||||
", maxMBSortInHeap=" + maxMBSortInHeap;
|
", maxMBSortInHeap=" + maxMBSortInHeap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Just like {@link BKDWriter} except it evilly picks random ways to split cells on
|
||||||
|
* recursion to try to provoke geo APIs that get upset at fun rectangles. */
|
||||||
|
private static class RandomlySplittingBKDWriter extends BKDWriter {
|
||||||
|
|
||||||
|
final Random random;
|
||||||
|
|
||||||
|
public RandomlySplittingBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims,
|
||||||
|
int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap,
|
||||||
|
long totalPointCount, boolean singleValuePerDoc, int randomSeed) throws IOException {
|
||||||
|
super(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount, singleValuePerDoc);
|
||||||
|
this.random = new Random(randomSeed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||||
|
// BKD normally defaults by the widest dimension, to try to make as squarish cells as possible, but we just pick a random one ;)
|
||||||
|
return random.nextInt(numDims);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,6 +62,9 @@ Bug Fixes
|
||||||
* SOLR-8857: HdfsUpdateLog does not use configured or new default number of version buckets and is
|
* SOLR-8857: HdfsUpdateLog does not use configured or new default number of version buckets and is
|
||||||
hard coded to 256. (Mark Miller, yonik, Gregory Chanan)
|
hard coded to 256. (Mark Miller, yonik, Gregory Chanan)
|
||||||
|
|
||||||
|
* SOLR-8902: Make sure ReturnFields only returns the requested fields from (fl=) evn when
|
||||||
|
DocumentTransformers ask for getExtraRequestFields() (ryan)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.
|
* SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.
|
||||||
|
|
|
@ -72,8 +72,7 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter {
|
||||||
public static class Resolver implements JavaBinCodec.ObjectResolver , JavaBinCodec.WritableDocFields {
|
public static class Resolver implements JavaBinCodec.ObjectResolver , JavaBinCodec.WritableDocFields {
|
||||||
protected final SolrQueryRequest solrQueryRequest;
|
protected final SolrQueryRequest solrQueryRequest;
|
||||||
protected IndexSchema schema;
|
protected IndexSchema schema;
|
||||||
protected SolrIndexSearcher searcher; // TODO - this is never set? always null?
|
protected ReturnFields returnFields;
|
||||||
protected final ReturnFields returnFields;
|
|
||||||
|
|
||||||
public Resolver(SolrQueryRequest req, ReturnFields returnFields) {
|
public Resolver(SolrQueryRequest req, ReturnFields returnFields) {
|
||||||
solrQueryRequest = req;
|
solrQueryRequest = req;
|
||||||
|
@ -83,7 +82,13 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter {
|
||||||
@Override
|
@Override
|
||||||
public Object resolve(Object o, JavaBinCodec codec) throws IOException {
|
public Object resolve(Object o, JavaBinCodec codec) throws IOException {
|
||||||
if (o instanceof ResultContext) {
|
if (o instanceof ResultContext) {
|
||||||
writeResults((ResultContext) o, codec);
|
ReturnFields orig = returnFields;
|
||||||
|
ResultContext res = (ResultContext)o;
|
||||||
|
if(res.getReturnFields()!=null) {
|
||||||
|
returnFields = res.getReturnFields();
|
||||||
|
}
|
||||||
|
writeResults(res, codec);
|
||||||
|
returnFields = orig;
|
||||||
return null; // null means we completely handled it
|
return null; // null means we completely handled it
|
||||||
}
|
}
|
||||||
if (o instanceof DocList) {
|
if (o instanceof DocList) {
|
||||||
|
|
|
@ -51,6 +51,7 @@ public class SolrReturnFields extends ReturnFields {
|
||||||
private final List<String> globs = new ArrayList<>(1);
|
private final List<String> globs = new ArrayList<>(1);
|
||||||
|
|
||||||
// The lucene field names to request from the SolrIndexSearcher
|
// The lucene field names to request from the SolrIndexSearcher
|
||||||
|
// This *may* include fields that will not be in the final response
|
||||||
private final Set<String> fields = new HashSet<>();
|
private final Set<String> fields = new HashSet<>();
|
||||||
|
|
||||||
// Field names that are OK to include in the response.
|
// Field names that are OK to include in the response.
|
||||||
|
@ -129,16 +130,12 @@ public class SolrReturnFields extends ReturnFields {
|
||||||
}
|
}
|
||||||
augmenters.addTransformer( new RenameFieldTransformer( from, to, copy ) );
|
augmenters.addTransformer( new RenameFieldTransformer( from, to, copy ) );
|
||||||
}
|
}
|
||||||
|
if( !_wantsAllFields && !globs.isEmpty() ) {
|
||||||
if( !_wantsAllFields ) {
|
// TODO??? need to fill up the fields with matching field names in the index
|
||||||
if( !globs.isEmpty() ) {
|
// and add them to okFieldNames?
|
||||||
// TODO??? need to fill up the fields with matching field names in the index
|
// maybe just get all fields?
|
||||||
// and add them to okFieldNames?
|
// this would disable field selection optimization... i think thatis OK
|
||||||
// maybe just get all fields?
|
fields.clear(); // this will get all fields, and use wantsField to limit
|
||||||
// this would disable field selection optimization... i think thatis OK
|
|
||||||
fields.clear(); // this will get all fields, and use wantsField to limit
|
|
||||||
}
|
|
||||||
okFieldNames.addAll( fields );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( augmenters.size() == 1 ) {
|
if( augmenters.size() == 1 ) {
|
||||||
|
|
|
@ -264,6 +264,14 @@ public class ReturnFieldsTest extends SolrTestCaseJ4 {
|
||||||
assertFalse( rf.wantsField( "id" ) );
|
assertFalse( rf.wantsField( "id" ) );
|
||||||
assertFalse(rf.wantsAllFields());
|
assertFalse(rf.wantsAllFields());
|
||||||
assertNull(rf.getTransformer());
|
assertNull(rf.getTransformer());
|
||||||
|
|
||||||
|
// Don't return 'store_rpt' just because it is required by the transformer
|
||||||
|
rf = new SolrReturnFields( req("fl", "[geo f=store_rpt]") );
|
||||||
|
assertFalse( rf.wantsScore() );
|
||||||
|
assertTrue(rf.wantsField("[geo]"));
|
||||||
|
assertFalse( rf.wantsField( "store_rpt" ) );
|
||||||
|
assertFalse(rf.wantsAllFields());
|
||||||
|
assertNotNull(rf.getTransformer());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue