SOLR-445: Merge remote-tracking branch 'refs/remotes/origin/branch_6x' into branch_6x

(picking up mid backport conflicts)
2016-03-25 14:07:06 -07:00 · 2016-03-25 14:07:06 -07:00 · b8c0ff66f9
parent 5b6eacb80b e26c0b7125
commit b8c0ff66f9
7 changed files with 96 additions and 18 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
@ -42,8 +42,8 @@ import org.apache.lucene.util.bkd.BKDWriter;
 /** Writes dimensional values */
 public class Lucene60PointsWriter extends PointsWriter implements Closeable {
-  final IndexOutput dataOut;
+  protected final IndexOutput dataOut;
-  final Map<String,Long> indexFPs = new HashMap<>();
+  protected final Map<String,Long> indexFPs = new HashMap<>();
  final SegmentWriteState writeState;
  final int maxPointsInLeafNode;
  final double maxMBSortInHeap;
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@ -1033,8 +1033,7 @@ public class BKDWriter implements Closeable {
    return true;
  }
-  // TODO: make this protected when we want to subclass to play with different splitting criteria
+  protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
  private int split(byte[] minPackedValue, byte[] maxPackedValue) {
    // Find which dim has the largest span so we can split on it:
    int splitDim = -1;
    for(int dim=0;dim<numDims;dim++) {
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
@ -52,8 +52,11 @@ import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
 import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
 import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
 import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.bkd.BKDWriter;
 /**
 * Codec that assigns per-field random postings formats.
@ -93,13 +96,55 @@ public class RandomCodec extends AssertingCodec {
  // TODO: improve how we randomize this...
  private final int maxPointsInLeafNode;
  private final double maxMBSortInHeap;
  private final int bkdSplitRandomSeed;
  @Override
  public PointsFormat pointsFormat() {
    return new AssertingPointsFormat(new PointsFormat() {
      @Override
      public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
-        return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
+
        // Randomize how BKDWriter chooses its splis:
        return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
          @Override
          public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
            boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
            try (BKDWriter writer = new RandomlySplittingBKDWriter(writeState.segmentInfo.maxDoc(),
                                                                   writeState.directory,
                                                                   writeState.segmentInfo.name,
                                                                   fieldInfo.getPointDimensionCount(),
                                                                   fieldInfo.getPointNumBytes(),
                                                                   maxPointsInLeafNode,
                                                                   maxMBSortInHeap,
                                                                   values.size(fieldInfo.name),
                                                                   singleValuePerDoc,
                                                                   bkdSplitRandomSeed ^ fieldInfo.name.hashCode())) {
                values.intersect(fieldInfo.name, new IntersectVisitor() {
                    @Override
                    public void visit(int docID) {
                      throw new IllegalStateException();
                    }
                    public void visit(int docID, byte[] packedValue) throws IOException {
                      writer.add(packedValue, docID);
                    }
                    @Override
                    public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                      return PointValues.Relation.CELL_CROSSES_QUERY;
                    }
                  });
                // We could have 0 points on merge since all docs with dimensional fields may be deleted:
                if (writer.getPointCount() > 0) {
                  indexFPs.put(fieldInfo.name, writer.finish(dataOut));
                }
              }
          }
        };
      }
      @Override
@ -152,6 +197,7 @@ public class RandomCodec extends AssertingCodec {
    maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048);
    maxMBSortInHeap = 4.0 + (3*random.nextDouble());
    bkdSplitRandomSeed = random.nextInt();
    add(avoidCodecs,
        TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock),
@ -221,4 +267,24 @@ public class RandomCodec extends AssertingCodec {
           ", maxPointsInLeafNode=" + maxPointsInLeafNode +
           ", maxMBSortInHeap=" + maxMBSortInHeap;
  }
  /** Just like {@link BKDWriter} except it evilly picks random ways to split cells on
   *  recursion to try to provoke geo APIs that get upset at fun rectangles. */
  private static class RandomlySplittingBKDWriter extends BKDWriter {
    final Random random;
    public RandomlySplittingBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims,
                                      int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap,
                                      long totalPointCount, boolean singleValuePerDoc, int randomSeed) throws IOException {
      super(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount, singleValuePerDoc);
      this.random = new Random(randomSeed);
    }
    @Override
    protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
      // BKD normally defaults by the widest dimension, to try to make as squarish cells as possible, but we just pick a random one ;)
      return random.nextInt(numDims);
    }
  }
 }
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -62,6 +62,9 @@ Bug Fixes
 * SOLR-8857: HdfsUpdateLog does not use configured or new default number of version buckets and is
  hard coded to 256. (Mark Miller, yonik, Gregory Chanan)
 * SOLR-8902: Make sure ReturnFields only returns the requested fields from (fl=) evn when 
  DocumentTransformers ask for getExtraRequestFields()  (ryan)
 Optimizations
 ----------------------
 * SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.
--- a/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java
+++ b/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java
@ -72,8 +72,7 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter {
  public static class Resolver implements JavaBinCodec.ObjectResolver , JavaBinCodec.WritableDocFields {
    protected final SolrQueryRequest solrQueryRequest;
    protected IndexSchema schema;
-    protected SolrIndexSearcher searcher; // TODO - this is never set?  always null?
+    protected ReturnFields returnFields;
    protected final ReturnFields returnFields;
    public Resolver(SolrQueryRequest req, ReturnFields returnFields) {
      solrQueryRequest = req;
@ -83,7 +82,13 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter {
    @Override
    public Object resolve(Object o, JavaBinCodec codec) throws IOException {
      if (o instanceof ResultContext) {
-        writeResults((ResultContext) o, codec);
+        ReturnFields orig = returnFields;
        ResultContext res = (ResultContext)o;
        if(res.getReturnFields()!=null) {
          returnFields = res.getReturnFields();
        }
        writeResults(res, codec);
        returnFields = orig;
        return null; // null means we completely handled it
      }
      if (o instanceof DocList) {
--- a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java
@ -51,6 +51,7 @@ public class SolrReturnFields extends ReturnFields {
  private final List<String> globs = new ArrayList<>(1);
  // The lucene field names to request from the SolrIndexSearcher
  // This *may* include fields that will not be in the final response
  private final Set<String> fields = new HashSet<>();
  // Field names that are OK to include in the response.
@ -129,16 +130,12 @@ public class SolrReturnFields extends ReturnFields {
      }
      augmenters.addTransformer( new RenameFieldTransformer( from, to, copy ) );
    }
-
+    if( !_wantsAllFields && !globs.isEmpty() ) {
-    if( !_wantsAllFields ) {
+      // TODO??? need to fill up the fields with matching field names in the index
-      if( !globs.isEmpty() ) {
+      // and add them to okFieldNames?
-        // TODO??? need to fill up the fields with matching field names in the index
+      // maybe just get all fields?
-        // and add them to okFieldNames?
+      // this would disable field selection optimization... i think thatis OK
-        // maybe just get all fields?
+      fields.clear(); // this will get all fields, and use wantsField to limit
        // this would disable field selection optimization... i think thatis OK
        fields.clear(); // this will get all fields, and use wantsField to limit
      }
      okFieldNames.addAll( fields );
    }
    if( augmenters.size() == 1 ) {
--- a/solr/core/src/test/org/apache/solr/search/ReturnFieldsTest.java
+++ b/solr/core/src/test/org/apache/solr/search/ReturnFieldsTest.java
@ -264,6 +264,14 @@ public class ReturnFieldsTest extends SolrTestCaseJ4 {
    assertFalse( rf.wantsField( "id" ) );
    assertFalse(rf.wantsAllFields());
    assertNull(rf.getTransformer());
    // Don't return 'store_rpt' just because it is required by the transformer
    rf = new SolrReturnFields( req("fl", "[geo f=store_rpt]") );
    assertFalse( rf.wantsScore() );
    assertTrue(rf.wantsField("[geo]"));
    assertFalse( rf.wantsField( "store_rpt" ) );
    assertFalse(rf.wantsAllFields());
    assertNotNull(rf.getTransformer());
  }
  @Test