SOLR-445: Merge remote-tracking branch 'refs/remotes/origin/branch_6x' into branch_6x

(picking up mid backport conflicts)
This commit is contained in:
Chris Hostetter 2016-03-25 14:07:06 -07:00
commit b8c0ff66f9
7 changed files with 96 additions and 18 deletions

View File

@ -42,8 +42,8 @@ import org.apache.lucene.util.bkd.BKDWriter;
/** Writes dimensional values */ /** Writes dimensional values */
public class Lucene60PointsWriter extends PointsWriter implements Closeable { public class Lucene60PointsWriter extends PointsWriter implements Closeable {
final IndexOutput dataOut; protected final IndexOutput dataOut;
final Map<String,Long> indexFPs = new HashMap<>(); protected final Map<String,Long> indexFPs = new HashMap<>();
final SegmentWriteState writeState; final SegmentWriteState writeState;
final int maxPointsInLeafNode; final int maxPointsInLeafNode;
final double maxMBSortInHeap; final double maxMBSortInHeap;

View File

@ -1033,8 +1033,7 @@ public class BKDWriter implements Closeable {
return true; return true;
} }
// TODO: make this protected when we want to subclass to play with different splitting criteria protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
private int split(byte[] minPackedValue, byte[] maxPackedValue) {
// Find which dim has the largest span so we can split on it: // Find which dim has the largest span so we can split on it:
int splitDim = -1; int splitDim = -1;
for(int dim=0;dim<numDims;dim++) { for(int dim=0;dim<numDims;dim++) {

View File

@ -52,8 +52,11 @@ import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat; import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDWriter;
/** /**
* Codec that assigns per-field random postings formats. * Codec that assigns per-field random postings formats.
@ -93,13 +96,55 @@ public class RandomCodec extends AssertingCodec {
// TODO: improve how we randomize this... // TODO: improve how we randomize this...
private final int maxPointsInLeafNode; private final int maxPointsInLeafNode;
private final double maxMBSortInHeap; private final double maxMBSortInHeap;
private final int bkdSplitRandomSeed;
@Override @Override
public PointsFormat pointsFormat() { public PointsFormat pointsFormat() {
return new AssertingPointsFormat(new PointsFormat() { return new AssertingPointsFormat(new PointsFormat() {
@Override @Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException { public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
// Randomize how BKDWriter chooses its splis:
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
try (BKDWriter writer = new RandomlySplittingBKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
maxMBSortInHeap,
values.size(fieldInfo.name),
singleValuePerDoc,
bkdSplitRandomSeed ^ fieldInfo.name.hashCode())) {
values.intersect(fieldInfo.name, new IntersectVisitor() {
@Override
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) throws IOException {
writer.add(packedValue, docID);
}
@Override
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return PointValues.Relation.CELL_CROSSES_QUERY;
}
});
// We could have 0 points on merge since all docs with dimensional fields may be deleted:
if (writer.getPointCount() > 0) {
indexFPs.put(fieldInfo.name, writer.finish(dataOut));
}
}
}
};
} }
@Override @Override
@ -152,6 +197,7 @@ public class RandomCodec extends AssertingCodec {
maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048); maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048);
maxMBSortInHeap = 4.0 + (3*random.nextDouble()); maxMBSortInHeap = 4.0 + (3*random.nextDouble());
bkdSplitRandomSeed = random.nextInt();
add(avoidCodecs, add(avoidCodecs,
TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock), TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock),
@ -221,4 +267,24 @@ public class RandomCodec extends AssertingCodec {
", maxPointsInLeafNode=" + maxPointsInLeafNode + ", maxPointsInLeafNode=" + maxPointsInLeafNode +
", maxMBSortInHeap=" + maxMBSortInHeap; ", maxMBSortInHeap=" + maxMBSortInHeap;
} }
/** Just like {@link BKDWriter} except it evilly picks random ways to split cells on
* recursion to try to provoke geo APIs that get upset at fun rectangles. */
private static class RandomlySplittingBKDWriter extends BKDWriter {
final Random random;
public RandomlySplittingBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims,
int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap,
long totalPointCount, boolean singleValuePerDoc, int randomSeed) throws IOException {
super(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount, singleValuePerDoc);
this.random = new Random(randomSeed);
}
@Override
protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
// BKD normally defaults by the widest dimension, to try to make as squarish cells as possible, but we just pick a random one ;)
return random.nextInt(numDims);
}
}
} }

View File

@ -62,6 +62,9 @@ Bug Fixes
* SOLR-8857: HdfsUpdateLog does not use configured or new default number of version buckets and is * SOLR-8857: HdfsUpdateLog does not use configured or new default number of version buckets and is
hard coded to 256. (Mark Miller, yonik, Gregory Chanan) hard coded to 256. (Mark Miller, yonik, Gregory Chanan)
* SOLR-8902: Make sure ReturnFields only returns the requested fields from (fl=) evn when
DocumentTransformers ask for getExtraRequestFields() (ryan)
Optimizations Optimizations
---------------------- ----------------------
* SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation. * SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.

View File

@ -72,8 +72,7 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter {
public static class Resolver implements JavaBinCodec.ObjectResolver , JavaBinCodec.WritableDocFields { public static class Resolver implements JavaBinCodec.ObjectResolver , JavaBinCodec.WritableDocFields {
protected final SolrQueryRequest solrQueryRequest; protected final SolrQueryRequest solrQueryRequest;
protected IndexSchema schema; protected IndexSchema schema;
protected SolrIndexSearcher searcher; // TODO - this is never set? always null? protected ReturnFields returnFields;
protected final ReturnFields returnFields;
public Resolver(SolrQueryRequest req, ReturnFields returnFields) { public Resolver(SolrQueryRequest req, ReturnFields returnFields) {
solrQueryRequest = req; solrQueryRequest = req;
@ -83,7 +82,13 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter {
@Override @Override
public Object resolve(Object o, JavaBinCodec codec) throws IOException { public Object resolve(Object o, JavaBinCodec codec) throws IOException {
if (o instanceof ResultContext) { if (o instanceof ResultContext) {
writeResults((ResultContext) o, codec); ReturnFields orig = returnFields;
ResultContext res = (ResultContext)o;
if(res.getReturnFields()!=null) {
returnFields = res.getReturnFields();
}
writeResults(res, codec);
returnFields = orig;
return null; // null means we completely handled it return null; // null means we completely handled it
} }
if (o instanceof DocList) { if (o instanceof DocList) {

View File

@ -51,6 +51,7 @@ public class SolrReturnFields extends ReturnFields {
private final List<String> globs = new ArrayList<>(1); private final List<String> globs = new ArrayList<>(1);
// The lucene field names to request from the SolrIndexSearcher // The lucene field names to request from the SolrIndexSearcher
// This *may* include fields that will not be in the final response
private final Set<String> fields = new HashSet<>(); private final Set<String> fields = new HashSet<>();
// Field names that are OK to include in the response. // Field names that are OK to include in the response.
@ -129,16 +130,12 @@ public class SolrReturnFields extends ReturnFields {
} }
augmenters.addTransformer( new RenameFieldTransformer( from, to, copy ) ); augmenters.addTransformer( new RenameFieldTransformer( from, to, copy ) );
} }
if( !_wantsAllFields && !globs.isEmpty() ) {
if( !_wantsAllFields ) { // TODO??? need to fill up the fields with matching field names in the index
if( !globs.isEmpty() ) { // and add them to okFieldNames?
// TODO??? need to fill up the fields with matching field names in the index // maybe just get all fields?
// and add them to okFieldNames? // this would disable field selection optimization... i think thatis OK
// maybe just get all fields? fields.clear(); // this will get all fields, and use wantsField to limit
// this would disable field selection optimization... i think thatis OK
fields.clear(); // this will get all fields, and use wantsField to limit
}
okFieldNames.addAll( fields );
} }
if( augmenters.size() == 1 ) { if( augmenters.size() == 1 ) {

View File

@ -264,6 +264,14 @@ public class ReturnFieldsTest extends SolrTestCaseJ4 {
assertFalse( rf.wantsField( "id" ) ); assertFalse( rf.wantsField( "id" ) );
assertFalse(rf.wantsAllFields()); assertFalse(rf.wantsAllFields());
assertNull(rf.getTransformer()); assertNull(rf.getTransformer());
// Don't return 'store_rpt' just because it is required by the transformer
rf = new SolrReturnFields( req("fl", "[geo f=store_rpt]") );
assertFalse( rf.wantsScore() );
assertTrue(rf.wantsField("[geo]"));
assertFalse( rf.wantsField( "store_rpt" ) );
assertFalse(rf.wantsAllFields());
assertNotNull(rf.getTransformer());
} }
@Test @Test