merge trunk (1364720-1364799)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1364800 13f79535-47bb-0310-9956-ffa450edef68
2012-07-23 20:57:31 +00:00 · 2012-07-23 20:57:31 +00:00 · 33f6da286e
parent 3a0464f165 29c15ddb20
commit 33f6da286e
26 changed files with 620 additions and 63 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
@ -896,7 +896,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
        //   w.close();
        // }
      } else {
-        assert sumTotalTermFreq == 0;
+        assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
        assert sumDocFreq == 0;
        assert docCount == 0;
      }
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
@ -49,14 +49,17 @@ import org.apache.lucene.util.FixedBitSet;
 */
 public abstract class PostingsConsumer {
-  /** Adds a new doc in this term. */
+  /** Adds a new doc in this term. 
   * <code>freq</code> will be -1 when term frequencies are omitted
   * for the field. */
  public abstract void startDoc(int docID, int freq) throws IOException;
  /** Add a new position & payload, and start/end offset.  A
   *  null payload means no payload; a non-null payload with
   *  zero length also means no payload.  Caller may reuse
   *  the {@link BytesRef} for the payload between calls
-   *  (method must fully consume the payload). */
+   *  (method must fully consume the payload). <code>startOffset</code>
   *  and <code>endOffset</code> will be -1 when offsets are not indexed. */
  public abstract void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException;
  /** Called when we are done adding positions & payloads
@ -78,7 +81,7 @@ public abstract class PostingsConsumer {
          break;
        }
        visitedDocs.set(doc);
-        this.startDoc(doc, 0);
+        this.startDoc(doc, -1);
        this.finishDoc();
        df++;
      }
@ -146,6 +149,6 @@ public abstract class PostingsConsumer {
        df++;
      }
    }
-    return new TermStats(df, totTF);
+    return new TermStats(df, indexOptions == IndexOptions.DOCS_ONLY ? -1 : totTF);
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
@ -57,10 +57,14 @@ public abstract class TermsConsumer {
   *  no docs. */
  public abstract PostingsConsumer startTerm(BytesRef text) throws IOException;
-  /** Finishes the current term; numDocs must be > 0. */
+  /** Finishes the current term; numDocs must be > 0.
   *  <code>stats.totalTermFreq</code> will be -1 when term 
   *  frequencies are omitted for the field. */
  public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException;
-  /** Called when we are done adding terms to this field */
+  /** Called when we are done adding terms to this field.
   *  <code>sumTotalTermFreq</code> will be -1 when term 
   *  frequencies are omitted for the field. */
  public abstract void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException;
  /** Return the BytesRef Comparator used to sort terms
@ -205,6 +209,6 @@ public abstract class TermsConsumer {
        }
      }
    }
-    finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
+    finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
@ -430,7 +430,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
            if (readTermFreq) {
              termDocFreq = postings.docFreqs[termID];
            } else {
-              termDocFreq = 0;
+              termDocFreq = -1;
            }
            postings.lastDocCodes[termID] = -1;
          } else {
@ -441,7 +441,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
          final int code = freq.readVInt();
          if (!readTermFreq) {
            docID += code;
-            termDocFreq = 0;
+            termDocFreq = -1;
          } else {
            docID += code >>> 1;
            if ((code & 1) != 0) {
@ -469,7 +469,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
        // 2nd sweep does the real flush, but I suspect
        // that'd add too much time to flush.
        visitedDocs.set(docID);
-        postingsConsumer.startDoc(docID, termDocFreq);
+        postingsConsumer.startDoc(docID, writeTermFreq ? termDocFreq : -1);
        if (docID < delDocLimit) {
          // Mark it deleted.  TODO: we could also skip
          // writing its postings; this would be
@ -542,11 +542,11 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
        }
        postingsConsumer.finishDoc();
      }
-      termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
+      termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1));
      sumTotalTermFreq += totTF;
      sumDocFreq += numDocs;
    }
-    termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
+    termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
@ -116,7 +116,7 @@ public class TestCodecs extends LuceneTestCase {
        sumDF += term.docs.length;
        sumTotalTermCount += term.write(termsConsumer);
      }
-      termsConsumer.finish(sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
+      termsConsumer.finish(omitTF ? -1 : sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
    }
  }
@ -154,7 +154,7 @@ public class TestCodecs extends LuceneTestCase {
      for(int i=0;i<docs.length;i++) {
        final int termDocFreq;
        if (field.omitTF) {
-          termDocFreq = 0;
+          termDocFreq = -1;
        } else {
          termDocFreq = positions[i].length;
        }
@ -165,10 +165,10 @@ public class TestCodecs extends LuceneTestCase {
            final PositionData pos = positions[i][j];
            postingsConsumer.addPosition(pos.pos, pos.payload, -1, -1);
          }
        }
        postingsConsumer.finishDoc();
      }
-      }
+      termsConsumer.finishTerm(text, new TermStats(docs.length, field.omitTF ? -1 : totTF));
      termsConsumer.finishTerm(text, new TermStats(docs.length, totTF));
      return totTF;
    }
  }
--- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java
@ -406,7 +406,7 @@ public class TestPostingsFormat extends LuceneTestCase {
          if (VERBOSE) {
            System.out.println("    " + docCount + ": docID=" + posting.docID + " freq=" + posting.positions.size());
          }
-          postingsConsumer.startDoc(posting.docID, posting.positions.size());
+          postingsConsumer.startDoc(posting.docID, doFreq ? posting.positions.size() : -1);
          seenDocs.set(posting.docID);
          if (doPos) {
            totalTF += posting.positions.size();
@ -428,12 +428,12 @@ public class TestPostingsFormat extends LuceneTestCase {
          postingsConsumer.finishDoc();
          docCount++;
        }
-        termsConsumer.finishTerm(term, new TermStats(postings.size(), totalTF));
+        termsConsumer.finishTerm(term, new TermStats(postings.size(), doFreq ? totalTF : -1));
        sumTotalTF += totalTF;
        sumDF += postings.size();
      }
-      termsConsumer.finish(sumTotalTF, sumDF, seenDocs.cardinality());
+      termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
    }
    fieldsConsumer.close();
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/SpatialStrategy.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/SpatialStrategy.java
@ -28,10 +28,26 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.spatial.query.SpatialArgs;
 /**
- * The SpatialStrategy encapsulates an approach to indexing and searching based on shapes.
+ * The SpatialStrategy encapsulates an approach to indexing and searching based
 * on shapes.
 * <p/>
- * Note that a SpatialStrategy is not involved with the Lucene stored field values of shapes, which is
+ * Different implementations will support different features. A strategy should
- * immaterial to indexing & search.
+ * document these common elements:
 * <ul>
 *   <li>Can it index more than one shape per field?</li>
 *   <li>What types of shapes can be indexed?</li>
 *   <li>What types of query shapes can be used?</li>
 *   <li>What types of query operations are supported?
 *   This might vary per shape.</li>
 *   <li>Are there caches?  Under what circumstances are they used?
 *   Roughly how big are they?  Is it segmented by Lucene segments, such as is
 *   done by the Lucene {@link org.apache.lucene.search.FieldCache} and
 *   {@link org.apache.lucene.index.DocValues} (ideal) or is it for the entire
 *   index?
 * </ul>
 * <p/>
 * Note that a SpatialStrategy is not involved with the Lucene stored field
 * values of shapes, which is immaterial to indexing & search.
 * <p/>
 * Thread-safe.
 *
--- a/lucene/spatial/src/java/overview.html
+++ b/lucene/spatial/src/java/overview.html
@ -16,8 +16,49 @@
 -->
 <html>
  <head>
-    <title>Apache Lucene Spatial Strategies</title>
+    <title>Apache Lucene Spatial Module</title>
  </head>
  <body>
  <h1>The Spatial Module for Apache Lucene</h1>
  <p>
    The spatial module is new is Lucene 4, replacing the old contrib module
    that came before it. The principle interface to the module is
    a {@link org.apache.lucene.spatial.SpatialStrategy}
    which encapsulates an approach to indexing and searching
    based on shapes.  Different Strategies have different features and
    performance profiles, which are documented at each Strategy class level.
  </p>
  <p>
    For some sample code showing how to use the API, see SpatialExample.java in
    the tests.
  </p>
  <p>
    The spatial module uses
    <a href="https://github.com/spatial4j/spatial4j">Spatial4j</a>
    heavily.  Spatial4j is an ASL licensed library with these capabilities:
    <ul>
    <li>Provides shape implementations, namely point, rectangle,
      and circle.  Both geospatial contexts and plain 2D Euclidean/Cartesian contexts
      are supported.
      With an additional dependency, it adds polygon and other geometry shape
      support via integration with
      <a href="http://sourceforge.net/projects/jts-topo-suite/">JTS Topology Suite</a>.
      This includes dateline wrap support.</li>
    <li>Shape parsing and serialization, including
      <a href="http://en.wikipedia.org/wiki/Well-known_text">Well-Known Text (WKT)</a>
      (via JTS).</li>
    <li>Distance and other spatial related math calculations.</li>
    </ul>
  </p>
  <p>
    Historical note: The new spatial module was once known as
    Lucene Spatial Playground (LSP) as an external project.  In ~March 2012, LSP
    split into this new module as part of Lucene and Spatial4j externally. A
    large chunk of the LSP implementation originated as SOLR-2155 which uses
    trie/prefix-tree algorithms with a geohash encoding.
  </p>
  </body>
 </html>
--- a/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialExample.java
+++ b/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialExample.java
@ -0,0 +1,180 @@
 package org.apache.lucene.spatial;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import com.spatial4j.core.context.SpatialContext;
 import com.spatial4j.core.context.simple.SimpleSpatialContext;
 import com.spatial4j.core.shape.Shape;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.IntField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
 import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
 import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
 import org.apache.lucene.spatial.query.SpatialArgs;
 import org.apache.lucene.spatial.query.SpatialArgsParser;
 import org.apache.lucene.spatial.query.SpatialOperation;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.Version;
 import java.io.IOException;
 /**
 * This class serves as example code to show how to use the Lucene spatial
 * module.
 */
 public class SpatialExample extends LuceneTestCase {
  public static void main(String[] args) throws IOException {
    new SpatialExample().test();
  }
  public void test() throws IOException {
    init();
    indexPoints();
    search();
  }
  /**
   * The Spatial4j {@link SpatialContext} is a sort of global-ish singleton
   * needed by Lucene spatial.  It's a facade to the rest of Spatial4j, acting
   * as a factory for {@link Shape}s and provides access to reading and writing
   * them from Strings.
   */
  private SpatialContext ctx;//"ctx" is the conventional variable name
  /**
   * The Lucene spatial {@link SpatialStrategy} encapsulates an approach to
   * indexing and searching shapes, and providing relevancy scores for them.
   * It's a simple API to unify different approaches.
   * <p />
   * Note that these are initialized with a field name.
   */
  private SpatialStrategy strategy;
  private Directory directory;
  protected void init() {
    //Typical geospatial context with kilometer units.
    //  These can also be constructed from a factory: SpatialContextFactory
    this.ctx = SimpleSpatialContext.GEO_KM;
    int maxLevels = 10;//results in sub-meter precision for geohash
    //TODO demo lookup by detail distance
    //  This can also be constructed from a factory: SpatialPrefixTreeFactory
    SpatialPrefixTree grid = new GeohashPrefixTree(ctx, maxLevels);
    this.strategy = new RecursivePrefixTreeStrategy(grid, "myGeoField");
    this.directory = new RAMDirectory();
  }
  private void indexPoints() throws IOException {
    IndexWriterConfig iwConfig = new IndexWriterConfig(TEST_VERSION_CURRENT,null);
    IndexWriter indexWriter = new IndexWriter(directory, iwConfig);
    //Spatial4j is x-y order for arguments
    indexWriter.addDocument(newSampleDocument(
        2, ctx.makePoint(-80.93, 33.77)));
    //When parsing a string to a shape, the presence of a comma means it's y-x
    // order (lon, lat)
    indexWriter.addDocument(newSampleDocument(
        4, ctx.readShape("-50.7693246, 60.9289094")));
    indexWriter.addDocument(newSampleDocument(
        20, ctx.makePoint(0.1,0.1), ctx.makePoint(0, 0)));
    indexWriter.close();
  }
  private Document newSampleDocument(int id, Shape... shapes) {
    Document doc = new Document();
    doc.add(new IntField("id", id, Field.Store.YES));
    //Potentially more than one shape in this field is supported by some
    // strategies; see the javadocs of the SpatialStrategy impl to see.
    for (Shape shape : shapes) {
      for (IndexableField f : strategy.createIndexableFields(shape)) {
        doc.add(f);
      }
      //store it too; the format is up to you
      doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));
    }
    return doc;
  }
  private void search() throws IOException {
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    Sort idSort = new Sort(new SortField("id", SortField.Type.INT));
    //--Filter by circle (<= distance from a point)
    {
      //Search with circle
      //note: SpatialArgs can be parsed from a string
      SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
          ctx.makeCircle(-80.0, 33.0, 200));//200km (since km == ctx.getDistanceUnits
      Filter filter = strategy.makeFilter(args);
      TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), filter, 10, idSort);
      assertDocMatchedIds(indexSearcher, docs, 2);
    }
    //--Match all, order by distance
    {
      SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,//doesn't matter
          ctx.makePoint(60, -50));
      ValueSource valueSource = strategy.makeValueSource(args);//the distance
      Sort reverseDistSort = new Sort(valueSource.getSortField(false)).rewrite(indexSearcher);//true=asc dist
      TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), 10, reverseDistSort);
      assertDocMatchedIds(indexSearcher, docs, 4, 20, 2);
    }
    //demo arg parsing
    {
      SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
          ctx.makeCircle(-80.0, 33.0, 200));
      SpatialArgs args2 = new SpatialArgsParser().parse("Intersects(Circle(33,-80 d=200))", ctx);
      assertEquals(args.toString(),args2.toString());
    }
    indexReader.close();
  }
  private void assertDocMatchedIds(IndexSearcher indexSearcher, TopDocs docs, int... ids) throws IOException {
    int[] gotIds = new int[docs.totalHits];
    for (int i = 0; i < gotIds.length; i++) {
      gotIds[i] = indexSearcher.doc(docs.scoreDocs[i].doc).getField("id").numericValue().intValue();
    }
    assertArrayEquals(ids,gotIds);
  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
@ -18,16 +18,24 @@ package org.apache.lucene.codecs.asserting;
 */
 import java.io.IOException;
 import java.util.Comparator;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsConsumer;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.TermStats;
 import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
 import org.apache.lucene.index.AssertingAtomicReader;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.OpenBitSet;
 /**
 * Just like {@link Lucene40PostingsFormat} but with additional asserts.
@ -39,10 +47,9 @@ public class AssertingPostingsFormat extends PostingsFormat {
    super("Asserting");
  }
  // TODO: we could add some useful checks here?
  @Override
  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    return in.fieldsConsumer(state);
+    return new AssertingFieldsConsumer(in.fieldsConsumer(state));
  }
  @Override
@ -85,4 +92,164 @@ public class AssertingPostingsFormat extends PostingsFormat {
      return in.getUniqueTermCount();
    }
  }
  static class AssertingFieldsConsumer extends FieldsConsumer {
    private final FieldsConsumer in;
    AssertingFieldsConsumer(FieldsConsumer in) {
      this.in = in;
    }
    @Override
    public TermsConsumer addField(FieldInfo field) throws IOException {
      TermsConsumer consumer = in.addField(field);
      assert consumer != null;
      return new AssertingTermsConsumer(consumer, field);
    }
    @Override
    public void close() throws IOException {
      in.close();
    }
  }
  static enum TermsConsumerState { INITIAL, START, FINISHED };
  static class AssertingTermsConsumer extends TermsConsumer {
    private final TermsConsumer in;
    private final FieldInfo fieldInfo;
    private BytesRef lastTerm = null;
    private TermsConsumerState state = TermsConsumerState.INITIAL;
    private AssertingPostingsConsumer lastPostingsConsumer = null;
    private long sumTotalTermFreq = 0;
    private long sumDocFreq = 0;
    private OpenBitSet visitedDocs = new OpenBitSet();
    AssertingTermsConsumer(TermsConsumer in, FieldInfo fieldInfo) {
      this.in = in;
      this.fieldInfo = fieldInfo;
    }
    @Override
    public PostingsConsumer startTerm(BytesRef text) throws IOException {
      assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0;
      state = TermsConsumerState.START;
      assert lastTerm == null || in.getComparator().compare(text, lastTerm) > 0;
      lastTerm = BytesRef.deepCopyOf(text);
      return lastPostingsConsumer = new AssertingPostingsConsumer(in.startTerm(text), fieldInfo, visitedDocs);
    }
    @Override
    public void finishTerm(BytesRef text, TermStats stats) throws IOException {
      assert state == TermsConsumerState.START;
      state = TermsConsumerState.INITIAL;
      assert text.equals(lastTerm);
      assert stats.docFreq > 0; // otherwise, this method should not be called.
      assert stats.docFreq == lastPostingsConsumer.docFreq;
      sumDocFreq += stats.docFreq;
      if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
        assert stats.totalTermFreq == -1;
      } else {
        assert stats.totalTermFreq == lastPostingsConsumer.totalTermFreq;
        sumTotalTermFreq += stats.totalTermFreq;
      }
      in.finishTerm(text, stats);
    }
    @Override
    public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
      assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0;
      state = TermsConsumerState.FINISHED;
      assert docCount >= 0;
      assert docCount == visitedDocs.cardinality();
      assert sumDocFreq >= docCount;
      assert sumDocFreq == this.sumDocFreq;
      if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
        assert sumTotalTermFreq == -1;
      } else {
        assert sumTotalTermFreq >= sumDocFreq;
        assert sumTotalTermFreq == this.sumTotalTermFreq;
      }
      in.finish(sumTotalTermFreq, sumDocFreq, docCount);
    }
    @Override
    public Comparator<BytesRef> getComparator() throws IOException {
      return in.getComparator();
    }
  }
  static enum PostingsConsumerState { INITIAL, START };
  static class AssertingPostingsConsumer extends PostingsConsumer {
    private final PostingsConsumer in;
    private final FieldInfo fieldInfo;
    private final OpenBitSet visitedDocs;
    private PostingsConsumerState state = PostingsConsumerState.INITIAL;
    private int freq;
    private int positionCount;
    private int lastPosition = 0;
    private int lastStartOffset = 0;
    int docFreq = 0;
    long totalTermFreq = 0;
    AssertingPostingsConsumer(PostingsConsumer in, FieldInfo fieldInfo, OpenBitSet visitedDocs) {
      this.in = in;
      this.fieldInfo = fieldInfo;
      this.visitedDocs = visitedDocs;
    }
    @Override
    public void startDoc(int docID, int freq) throws IOException {
      assert state == PostingsConsumerState.INITIAL;
      state = PostingsConsumerState.START;
      assert docID >= 0;
      if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
        assert freq == -1;
        this.freq = 0; // we don't expect any positions here
      } else {
        assert freq > 0;
        this.freq = freq;
        totalTermFreq += freq;
      }
      this.positionCount = 0;
      this.lastPosition = 0;
      this.lastStartOffset = 0;
      docFreq++;
      visitedDocs.set(docID);
      in.startDoc(docID, freq);
    }
    @Override
    public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
      assert state == PostingsConsumerState.START;
      assert positionCount < freq;
      positionCount++;
      assert position >= lastPosition || position == -1; /* we still allow -1 from old 3.x indexes */
      lastPosition = position;
      if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
        assert startOffset >= 0;
        assert startOffset >= lastStartOffset;
        lastStartOffset = startOffset;
        assert endOffset >= startOffset;
      } else {
        assert startOffset == -1;
        assert endOffset == -1;
      }
      if (payload != null) {
        assert fieldInfo.hasPayloads();
      }
      in.addPosition(position, payload, startOffset, endOffset);
    }
    @Override
    public void finishDoc() throws IOException {
      assert state == PostingsConsumerState.START;
      state = PostingsConsumerState.INITIAL;
      if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
        assert positionCount == 0; // we should not have fed any positions!
      } else {
        assert positionCount == freq;
      }
      in.finishDoc();
    }
  }
 }
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -131,6 +131,8 @@ Bug Fixes
 * SOLR-3663: There are a couple of bugs in the sync process when a leader goes down and a 
  new leader is elected. (Mark Miller)
 * SOLR-3623: Fixed inconsistent treatment of third-party dependencies for 
  solr contribs analysis-extras & uima (hossman) 
 Other Changes
 ----------------------
--- a/solr/common-build.xml
+++ b/solr/common-build.xml
@ -70,21 +70,32 @@
    -->
  <property name="solr.spec.version" value="5.0.0.${dateversion}" />
  <path id="solr.lucene.libs">
    <!-- List of jars that will be used as the foundation for both
         the base classpath, as well as copied into the lucene-libs dir 
 	 in the release.
    -->
    <!-- NOTE: lucene-core is explicitly not included because of the 
 	 base.classpath (compilation & tests are done directly against   
 	 the class files w/o needing to build the jar)
    -->
    <pathelement location="${analyzers-common.jar}"/>
    <pathelement location="${analyzers-kuromoji.jar}"/>
    <pathelement location="${analyzers-phonetic.jar}"/>
    <pathelement location="${highlighter.jar}"/>
    <pathelement location="${memory.jar}"/>
    <pathelement location="${misc.jar}"/>
    <pathelement location="${spatial.jar}"/>
    <pathelement location="${suggest.jar}"/>
    <pathelement location="${grouping.jar}"/>
    <pathelement location="${queries.jar}"/>
    <pathelement location="${queryparser.jar}"/>
  </path>
  <path id="solr.base.classpath">
  	<pathelement path="${analyzers-common.jar}"/>
  	<pathelement path="${analyzers-kuromoji.jar}"/>
  	<pathelement path="${analyzers-phonetic.jar}"/>
    <pathelement path="${analyzers-uima.jar}"/>
  	<pathelement path="${highlighter.jar}"/>
  	<pathelement path="${memory.jar}"/>
  	<pathelement path="${misc.jar}"/>
  	<pathelement path="${spatial.jar}"/>
  	<pathelement path="${suggest.jar}"/>
    <pathelement path="${grouping.jar}"/>
    <pathelement path="${queries.jar}"/>
    <pathelement path="${queryparser.jar}"/>
    <pathelement location="${common-solr.dir}/build/solr-solrj/classes/java"/>
    <pathelement location="${common-solr.dir}/build/solr-core/classes/java"/>
    <path refid="solr.lucene.libs" />
    <path refid="additional.dependencies"/>
    <path refid="base.classpath"/>
  </path>
@ -125,7 +136,7 @@
  </target>
  <target name="prep-lucene-jars" 
-  	      depends="jar-lucene-core, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-analyzers-morfologik, jar-suggest, jar-highlighter, jar-memory,
+  	      depends="jar-lucene-core, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-suggest, jar-highlighter, jar-memory,
  	               jar-misc, jar-spatial, jar-grouping, jar-queries, jar-queryparser">
  	  <property name="solr.deps.compiled" value="true"/>
  </target>
@ -137,19 +148,11 @@
      <propertyset refid="uptodate.and.compiled.properties"/>
    </ant>
    <copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
      <path refid="solr.lucene.libs" />
      <!-- NOTE: lucene-core is not already included in "solr.lucene.libs" 
 	   because of it's use in classpaths.
      -->
      <fileset file="${lucene-core.jar}" />
      <fileset file="${analyzers-common.jar}" />
      <fileset file="${analyzers-kuromoji.jar}" />
      <fileset file="${analyzers-phonetic.jar}" />
      <fileset file="${suggest.jar}" />
      <fileset file="${grouping.jar}" />
      <fileset file="${queries.jar}" />
      <fileset file="${queryparser.jar}" />
      <fileset file="${highlighter.jar}" />
      <fileset file="${memory.jar}" />
      <fileset file="${misc.jar}" />
      <fileset file="${spatial.jar}" />
      <fileset refid="analyzers-morfologik.fileset" />
    </copy>
    </sequential>
  </target>
--- a/solr/contrib/analysis-extras/README.txt
+++ b/solr/contrib/analysis-extras/README.txt
@ -9,8 +9,11 @@ Relies upon the following lucene components (in lucene-libs/):
 * lucene-analyzers-icu-X.Y.jar
 * lucene-analyzers-smartcn-X.Y.jar
 * lucene-analyzers-stempel-X.Y.jar
 * lucene-analyzers-morfologik-X.Y.jar
 * lucene-analyzers-smartcn-X.Y.jar
-And the ICU library (in lib/):
+And the following third-party library (in lib/):
 * icu4j-X.Y.jar
 * morfologik-*.jar
--- a/solr/contrib/analysis-extras/build.xml
+++ b/solr/contrib/analysis-extras/build.xml
@ -25,12 +25,16 @@
  <import file="../contrib-build.xml"/>
  <path id="analysis.extras.lucene.libs">
    <pathelement location="${analyzers-icu.jar}"/>
    <pathelement location="${analyzers-smartcn.jar}"/>
    <pathelement location="${analyzers-stempel.jar}"/>
    <pathelement location="${analyzers-morfologik.jar}"/>
  </path>
  <path id="classpath">
    <fileset dir="lib" excludes="${common.classpath.excludes}"/>
-  	<pathelement path="${analyzers-icu.jar}"/>
+    <path refid="analysis.extras.lucene.libs" />
  	<pathelement path="${analyzers-smartcn.jar}"/>
  	<pathelement path="${analyzers-stempel.jar}"/>
  	<fileset refid="analyzers-morfologik.fileset" />
    <path refid="solr.base.classpath"/>
  </path>
@ -38,10 +42,7 @@
          depends="jar-analyzers-icu, jar-analyzers-smartcn, jar-analyzers-stempel, jar-analyzers-morfologik">
    <mkdir dir="${build.dir}/lucene-libs"/>
    <copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
-      <fileset file="${analyzers-icu.jar}"/>
+      <path refid="analysis.extras.lucene.libs" />
      <fileset file="${analyzers-smartcn.jar}"/>
      <fileset file="${analyzers-stempel.jar}"/>
      <fileset refid="analyzers-morfologik.fileset" />
    </copy>
  </target>
--- a/solr/contrib/analysis-extras/ivy.xml
+++ b/solr/contrib/analysis-extras/ivy.xml
@ -20,6 +20,9 @@
    <info organisation="org.apache.solr" module="analysis-extras"/>
    <dependencies>
      <dependency org="com.ibm.icu" name="icu4j" rev="4.8.1.1" transitive="false"/>
      <dependency org="org.carrot2" name="morfologik-polish" rev="1.5.3" transitive="false"/>
      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.3" transitive="false"/>
      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.3" transitive="false"/>
      <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
    </dependencies>
 </ivy-module>
--- a/solr/contrib/analysis-extras/lib/morfologik-fsa-1.5.3.jar.sha1
+++ b/solr/contrib/analysis-extras/lib/morfologik-fsa-1.5.3.jar.sha1
@ -0,0 +1 @@
 d1f729cd3019e6d86485226202f84458141a5688
--- a/solr/contrib/analysis-extras/lib/morfologik-fsa-LICENSE-BSD.txt
+++ b/solr/contrib/analysis-extras/lib/morfologik-fsa-LICENSE-BSD.txt
@ -0,0 +1,29 @@
 Copyright (c) 2006 Dawid Weiss
 Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 Redistribution and use in source and binary forms, with or without modification, 
 are permitted provided that the following conditions are met:
    * Redistributions of source code must retain the above copyright notice, 
    this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice, 
    this list of conditions and the following disclaimer in the documentation 
    and/or other materials provided with the distribution.
    * Neither the name of Morfologik nor the names of its contributors 
    may be used to endorse or promote products derived from this software 
    without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 
 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/solr/contrib/analysis-extras/lib/morfologik-fsa-NOTICE.txt
+++ b/solr/contrib/analysis-extras/lib/morfologik-fsa-NOTICE.txt
@ -0,0 +1,2 @@
 This product includes BSD-licensed software developed by Dawid Weiss and Marcin Miłkowski
 (http://morfologik.blogspot.com/).
--- a/solr/contrib/analysis-extras/lib/morfologik-polish-1.5.3.jar.sha1
+++ b/solr/contrib/analysis-extras/lib/morfologik-polish-1.5.3.jar.sha1
@ -0,0 +1 @@
 8217b6f7ad018ceda0e824b2e60340000da4397a
--- a/solr/contrib/analysis-extras/lib/morfologik-polish-LICENSE-BSD.txt
+++ b/solr/contrib/analysis-extras/lib/morfologik-polish-LICENSE-BSD.txt
@ -0,0 +1,62 @@
 BSD-licensed dictionary of Polish (Morfologik)
 Copyright (c) 2012, Marcin Miłkowski
 All rights reserved.
 Redistribution and  use in  source and binary  forms, with  or without
 modification, are permitted provided that the following conditions are
 met:
 1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the
   distribution.
 THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
 OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
 LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
 CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
 SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
 BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
 WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 --
 BSD-licensed dictionary of Polish (SGJP)
 http://sgjp.pl/morfeusz/
 Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, 
 	    	 Marcin Woliński, Robert Wołosz
 All rights reserved.
 Redistribution and  use in  source and binary  forms, with  or without
 modification, are permitted provided that the following conditions are
 met:
 1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the
   distribution.
 THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
 OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
 LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
 CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
 SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
 BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
 WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/solr/contrib/analysis-extras/lib/morfologik-polish-NOTICE.txt
+++ b/solr/contrib/analysis-extras/lib/morfologik-polish-NOTICE.txt
@ -0,0 +1,6 @@
 This product includes data from BSD-licensed dictionary of Polish (Morfologik)
 (http://morfologik.blogspot.com/)
 This product includes data from BSD-licensed dictionary of Polish (SGJP)
 (http://sgjp.pl/morfeusz/)
--- a/solr/contrib/analysis-extras/lib/morfologik-stemming-1.5.3.jar.sha1
+++ b/solr/contrib/analysis-extras/lib/morfologik-stemming-1.5.3.jar.sha1
@ -0,0 +1 @@
 c4ead57b78fa71b00553ff21da6fb5a326e914e8
--- a/solr/contrib/analysis-extras/lib/morfologik-stemming-LICENSE-BSD.txt
+++ b/solr/contrib/analysis-extras/lib/morfologik-stemming-LICENSE-BSD.txt
@ -0,0 +1,29 @@
 Copyright (c) 2006 Dawid Weiss
 Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 Redistribution and use in source and binary forms, with or without modification, 
 are permitted provided that the following conditions are met:
    * Redistributions of source code must retain the above copyright notice, 
    this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice, 
    this list of conditions and the following disclaimer in the documentation 
    and/or other materials provided with the distribution.
    * Neither the name of Morfologik nor the names of its contributors 
    may be used to endorse or promote products derived from this software 
    without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 
 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/solr/contrib/analysis-extras/lib/morfologik-stemming-NOTICE.txt
+++ b/solr/contrib/analysis-extras/lib/morfologik-stemming-NOTICE.txt
@ -0,0 +1,2 @@
 This product includes BSD-licensed software developed by Dawid Weiss and Marcin Miłkowski
 (http://morfologik.blogspot.com/).
--- a/solr/contrib/uima/README.txt
+++ b/solr/contrib/uima/README.txt
@ -6,6 +6,7 @@ To start using Solr UIMA Metadata Extraction Library you should go through the f
   or set <lib/> tags in solrconfig.xml appropriately to point those jar files.
   <lib dir="../../contrib/uima/lib" />
   <lib dir="../../contrib/uima/lucene-libs" />
   <lib dir="../../dist/" regex="apache-solr-uima-\d.*\.jar" />
 2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
@ -191,7 +191,7 @@ public class LeaderElectionIntegrationTest extends SolrTestCaseJ4 {
      int newLeaderPort = getLeaderPort(leader);
      int retry = 0;
      while (leaderPort == newLeaderPort) {
-        if (retry++ == 20) {
+        if (retry++ == 60) {
          break;
        }
        Thread.sleep(1000);
		`@ -0,0 +1,2 @@`
							`This product includes BSD-licensed software developed by Dawid Weiss and Marcin Miłkowski`
							`(http://morfologik.blogspot.com/).`