Upgrade to Lucene 4.2

2013-03-07 12:35:59 +01:00 · 2013-03-07 12:35:59 +01:00 · 11bf7a8b1a
parent 75fd6d4985
commit 11bf7a8b1a
14 changed files with 87 additions and 628 deletions
--- a/pom.xml
+++ b/pom.xml
@ -30,7 +30,7 @@
    </parent>

    <properties>
-        <lucene.version>4.1.0</lucene.version>
+        <lucene.version>4.2.0</lucene.version>
    </properties>

    <repositories>
--- a/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java
+++ b/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java
@ -25,7 +25,7 @@ import org.elasticsearch.common.lucene.search.Queries;
 * <tt>minimumNumberShouldMatch</tt> specification that uses the actual num of high frequent terms
 * to calculate the minimum matching terms.
 */
-public class ExtendedCommonTermsQuery extends XCommonTermsQuery {
+public class ExtendedCommonTermsQuery extends CommonTermsQuery {

    public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord) {
        super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
@ -38,7 +38,7 @@ public class ExtendedCommonTermsQuery extends XCommonTermsQuery {
    private String minNumShouldMatchSpec;

    @Override
-    protected int getMinimumNumberShouldMatch(int numOptional) {
+    protected int calcLowFreqMinimumNumberShouldMatch(int numOptional) {
        if (minNumShouldMatchSpec == null) {
            return 0;
        }
--- a/src/main/java/org/apache/lucene/queries/XCommonTermsQuery.java
+++ b/src/main/java/org/apache/lucene/queries/XCommonTermsQuery.java
@ -1,381 +0,0 @@
-package org.apache.lucene.queries;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.util.ToStringUtils;
-
-/**
- * A query that executes high-frequency terms in a optional sub-query to prevent
- * slow queries due to "common" terms like stopwords. This query basically
- * builds 2 queries off the {@link #add(Term) added} terms where low-frequency
- * terms are added to a required boolean clause and high-frequency terms are
- * added to an optional boolean clause. The optional clause is only executed if
- * the required "low-frequency' clause matches. Scores produced by this query
- * will be slightly different to plain {@link BooleanQuery} scorer mainly due to
- * differences in the {@link Similarity#coord(int,int) number of leave queries}
- * in the required boolean clause. In the most cases high-frequency terms are
- * unlikely to significantly contribute to the document score unless at least
- * one of the low-frequency terms are matched such that this query can improve
- * query execution times significantly if applicable.
- * <p>
- * {@link XCommonTermsQuery} has several advantages over stopword filtering at
- * index or query time since a term can be "classified" based on the actual
- * document frequency in the index and can prevent slow queries even across
- * domains without specialized stopword files.
- * </p>
- * <p>
- * <b>Note:</b> if the query only contains high-frequency terms the query is
- * rewritten into a plain conjunction query ie. all high-frequency terms need to
- * match in order to match a document.
- * </p>
- */
-//LUCENE MONITOR - Copied from CommonTermsQuery changes are tracked with //CHANGE
-public class XCommonTermsQuery extends Query {
-  /*
-   * TODO maybe it would make sense to abstract this even further and allow to
-   * rewrite to dismax rather than boolean. Yet, this can already be subclassed
-   * to do so.
-   */
-  protected final List<Term> terms = new ArrayList<Term>();
-  protected final boolean disableCoord;
-  protected final float maxTermFrequency;
-  protected final Occur lowFreqOccur;
-  protected final Occur highFreqOccur;
-  protected float lowFreqBoost = 1.0f;
-  protected float highFreqBoost = 1.0f;
-  //CHANGE made minNr... a float for fractions
-  protected float minNrShouldMatch = 0;
-  
-  /**
-   * Creates a new {@link XCommonTermsQuery}
-   * 
-   * @param highFreqOccur
-   *          {@link Occur} used for high frequency terms
-   * @param lowFreqOccur
-   *          {@link Occur} used for low frequency terms
-   * @param maxTermFrequency
-   *          a value in [0..1] (or absolute number >=1) representing the
-   *          maximum threshold of a terms document frequency to be considered a
-   *          low frequency term.
-   * @throws IllegalArgumentException
-   *           if {@link Occur#MUST_NOT} is pass as lowFreqOccur or
-   *           highFreqOccur
-   */
-  public XCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur,
-      float maxTermFrequency) {
-    this(highFreqOccur, lowFreqOccur, maxTermFrequency, false);
-  }
-  
-  /**
-   * Creates a new {@link XCommonTermsQuery}
-   * 
-   * @param highFreqOccur
-   *          {@link Occur} used for high frequency terms
-   * @param lowFreqOccur
-   *          {@link Occur} used for low frequency terms
-   * @param maxTermFrequency
-   *          a value in [0..1] (or absolute number >=1) representing the
-   *          maximum threshold of a terms document frequency to be considered a
-   *          low frequency term.
-   * @param disableCoord
-   *          disables {@link Similarity#coord(int,int)} in scoring for the low
-   *          / high frequency sub-queries
-   * @throws IllegalArgumentException
-   *           if {@link Occur#MUST_NOT} is pass as lowFreqOccur or
-   *           highFreqOccur
-   */
-  public XCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur,
-      float maxTermFrequency, boolean disableCoord) {
-    if (highFreqOccur == Occur.MUST_NOT) {
-      throw new IllegalArgumentException(
-          "highFreqOccur should be MUST or SHOULD but was MUST_NOT");
-    }
-    if (lowFreqOccur == Occur.MUST_NOT) {
-      throw new IllegalArgumentException(
-          "lowFreqOccur should be MUST or SHOULD but was MUST_NOT");
-    }
-    this.disableCoord = disableCoord;
-    this.highFreqOccur = highFreqOccur;
-    this.lowFreqOccur = lowFreqOccur;
-    this.maxTermFrequency = maxTermFrequency;
-  }
-  
-  /**
-   * Adds a term to the {@link CommonTermsQuery}
-   * 
-   * @param term
-   *          the term to add
-   */
-  public void add(Term term) {
-    if (term == null) {
-      throw new IllegalArgumentException("Term must not be null");
-    }
-    this.terms.add(term);
-  }
-  
-  @Override
-  public Query rewrite(IndexReader reader) throws IOException {
-    if (this.terms.isEmpty()) {
-      return new BooleanQuery();
-    } else if (this.terms.size() == 1) {
-      final TermQuery tq = new TermQuery(this.terms.get(0));
-      tq.setBoost(getBoost());
-      return tq;
-    }
-    final List<AtomicReaderContext> leaves = reader.leaves();
-    final int maxDoc = reader.maxDoc();
-    final TermContext[] contextArray = new TermContext[terms.size()];
-    final Term[] queryTerms = this.terms.toArray(new Term[0]);
-    collectTermContext(reader, leaves, contextArray, queryTerms);
-    return buildQuery(maxDoc, contextArray, queryTerms);
-  }
-  
-  //CHANGE added to get num optional
-  protected int getMinimumNumberShouldMatch(int numOptional) {
-      if (minNrShouldMatch >= 1.0f) {
-          return (int) minNrShouldMatch;
-      }
-      return (int) (minNrShouldMatch * numOptional);
-  }
-  
-  protected Query buildQuery(final int maxDoc,
-      final TermContext[] contextArray, final Term[] queryTerms) {
-    BooleanQuery lowFreq = new BooleanQuery(disableCoord);
-    BooleanQuery highFreq = new BooleanQuery(disableCoord);
-    highFreq.setBoost(highFreqBoost);
-    lowFreq.setBoost(lowFreqBoost);
-    
-    BooleanQuery query = new BooleanQuery(true);
-    
-    for (int i = 0; i < queryTerms.length; i++) {
-      TermContext termContext = contextArray[i];
-      if (termContext == null) {
-        lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
-      } else {
-        if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
-            || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
-                * (float) maxDoc))) {
-          highFreq
-              .add(new TermQuery(queryTerms[i], termContext), highFreqOccur);
-        } else {
-          lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur);
-        }
-      }
-      
-    }
-    if (lowFreqOccur == Occur.SHOULD) {
-        lowFreq.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch(lowFreq.clauses().size()));
-    }
-    if (lowFreq.clauses().isEmpty()) {
-      /*
-       * if lowFreq is empty we rewrite the high freq terms in a conjunction to
-       * prevent slow queries.
-       */
-      if (highFreqOccur == Occur.MUST) {
-        highFreq.setBoost(getBoost());
-        return highFreq;
-      } else {
-        BooleanQuery highFreqConjunction = new BooleanQuery();
-        for (BooleanClause booleanClause : highFreq) {
-          highFreqConjunction.add(booleanClause.getQuery(), Occur.MUST);
-        }
-        highFreqConjunction.setBoost(getBoost());
-        return highFreqConjunction;
-        
-      }
-    } else if (highFreq.clauses().isEmpty()) {
-      // only do low freq terms - we don't have high freq terms
-      lowFreq.setBoost(getBoost());
-      return lowFreq;
-    } else {
-      query.add(highFreq, Occur.SHOULD);
-      query.add(lowFreq, Occur.MUST);
-      query.setBoost(getBoost());
-      return query;
-    }
-  }
-  
-  public void collectTermContext(IndexReader reader,
-      List<AtomicReaderContext> leaves, TermContext[] contextArray,
-      Term[] queryTerms) throws IOException {
-    TermsEnum termsEnum = null;
-    for (AtomicReaderContext context : leaves) {
-      final Fields fields = context.reader().fields();
-      if (fields == null) {
-        // reader has no fields
-        continue;
-      }
-      for (int i = 0; i < queryTerms.length; i++) {
-        Term term = queryTerms[i];
-        TermContext termContext = contextArray[i];
-        final Terms terms = fields.terms(term.field());
-        if (terms == null) {
-          // field does not exist
-          continue;
-        }
-        termsEnum = terms.iterator(termsEnum);
-        assert termsEnum != null;
-        
-        if (termsEnum == TermsEnum.EMPTY) continue;
-        if (termsEnum.seekExact(term.bytes(), false)) {
-          if (termContext == null) {
-            contextArray[i] = new TermContext(reader.getContext(),
-                termsEnum.termState(), context.ord, termsEnum.docFreq(),
-                termsEnum.totalTermFreq());
-          } else {
-            termContext.register(termsEnum.termState(), context.ord,
-                termsEnum.docFreq(), termsEnum.totalTermFreq());
-          }
-          
-        }
-        
-      }
-    }
-  }
-  
-  /**
-   * Returns true iff {@link Similarity#coord(int,int)} is disabled in scoring
-   * for the high and low frequency query instance. The top level query will
-   * always disable coords.
-   */
-  public boolean isCoordDisabled() {
-    return disableCoord;
-  }
-  
-  /**
-   * Specifies a minimum number of the optional BooleanClauses which must be
-   * satisfied in order to produce a match on the low frequency terms query
-   * part.
-   * 
-   * <p>
-   * By default no optional clauses are necessary for a match (unless there are
-   * no required clauses). If this method is used, then the specified number of
-   * clauses is required.
-   * </p>
-   * 
-   * @param min
-   *          the number of optional clauses that must match
-   */
-  //CHANGE accepts now a float
-  public void setMinimumNumberShouldMatch(float min) {
-    this.minNrShouldMatch = min;
-  }
-  
-  /**
-   * Gets the minimum number of the optional BooleanClauses which must be
-   * satisfied.
-   */
-  //CHANGE returns now a float
-  public float getMinimumNumberShouldMatch() {
-    return minNrShouldMatch;
-  }
-  
-  @Override
-  public void extractTerms(Set<Term> terms) {
-    terms.addAll(this.terms);
-  }
-  
-  @Override
-  public String toString(String field) {
-    StringBuilder buffer = new StringBuilder();
-    boolean needParens = (getBoost() != 1.0)
-        || (getMinimumNumberShouldMatch() > 0);
-    if (needParens) {
-      buffer.append("(");
-    }
-    for (int i = 0; i < terms.size(); i++) {
-      Term t = terms.get(i);
-      buffer.append(new TermQuery(t).toString());
-      
-      if (i != terms.size() - 1) buffer.append(", ");
-    }
-    if (needParens) {
-      buffer.append(")");
-    }
-    if (getMinimumNumberShouldMatch() > 0) {
-      buffer.append('~');
-      buffer.append(getMinimumNumberShouldMatch());
-    }
-    if (getBoost() != 1.0f) {
-      buffer.append(ToStringUtils.boost(getBoost()));
-    }
-    return buffer.toString();
-  }
-  
-  @Override
-  public int hashCode() {
-    final int prime = 31;
-    int result = super.hashCode();
-    result = prime * result + (disableCoord ? 1231 : 1237);
-    result = prime * result + Float.floatToIntBits(highFreqBoost);
-    result = prime * result
-        + ((highFreqOccur == null) ? 0 : highFreqOccur.hashCode());
-    result = prime * result + Float.floatToIntBits(lowFreqBoost);
-    result = prime * result
-        + ((lowFreqOccur == null) ? 0 : lowFreqOccur.hashCode());
-    result = prime * result + Float.floatToIntBits(maxTermFrequency);
-    result = prime * result + Float.floatToIntBits(minNrShouldMatch);
-    result = prime * result + ((terms == null) ? 0 : terms.hashCode());
-    return result;
-  }
-  
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) return true;
-    if (!super.equals(obj)) return false;
-    if (getClass() != obj.getClass()) return false;
-    XCommonTermsQuery other = (XCommonTermsQuery) obj;
-    if (disableCoord != other.disableCoord) return false;
-    if (Float.floatToIntBits(highFreqBoost) != Float
-        .floatToIntBits(other.highFreqBoost)) return false;
-    if (highFreqOccur != other.highFreqOccur) return false;
-    if (Float.floatToIntBits(lowFreqBoost) != Float
-        .floatToIntBits(other.lowFreqBoost)) return false;
-    if (lowFreqOccur != other.lowFreqOccur) return false;
-    if (Float.floatToIntBits(maxTermFrequency) != Float
-        .floatToIntBits(other.maxTermFrequency)) return false;
-    if (minNrShouldMatch != other.minNrShouldMatch) return false;
-    if (terms == null) {
-      if (other.terms != null) return false;
-    } else if (!terms.equals(other.terms)) return false;
-    return true;
-  }
-  
-  //CHANGE added
-  public List<Term> terms() {
-      return this.terms;
-  }
-  
-}
--- a/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java
+++ b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java
@ -96,8 +96,6 @@ public class CustomFieldQuery extends FieldQuery {
            flatten(sourceQuery.rewrite(reader), reader, flatQueries);
        } else if (sourceQuery instanceof FiltersFunctionScoreQuery) {
            flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries);
-        } else if (sourceQuery instanceof ExtendedCommonTermsQuery) {
-            flatten(((ExtendedCommonTermsQuery)sourceQuery).rewrite(reader), reader, flatQueries);
        } else if (sourceQuery instanceof MultiPhraseQuery) {
            MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery);
            convertMultiPhraseQuery(0, new int[q.getTermArrays().size()] , q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
--- a/src/main/java/org/apache/lucene/store/OpenBufferedIndexOutput.java
+++ b/src/main/java/org/apache/lucene/store/OpenBufferedIndexOutput.java
@ -1,151 +0,0 @@
-package org.apache.lucene.store;
-
-import java.io.IOException;
-
-/**
- * Exactly the same as Lucene {@link BufferedIndexOutput} but with the ability to set the buffer size
- */
-// LUCENE MONITOR
-public abstract class OpenBufferedIndexOutput extends IndexOutput {
-
-    public static final int DEFAULT_BUFFER_SIZE = BufferedIndexOutput.BUFFER_SIZE;
-
-    final int BUFFER_SIZE;
-
-    private final byte[] buffer;
-    private long bufferStart = 0;           // position in file of buffer
-    private int bufferPosition = 0;         // position in buffer
-
-    protected OpenBufferedIndexOutput(int BUFFER_SIZE) {
-        this.BUFFER_SIZE = BUFFER_SIZE;
-        this.buffer = new byte[BUFFER_SIZE];
-    }
-
-    /**
-     * Writes a single byte.
-     *
-     * @see IndexInput#readByte()
-     */
-    @Override
-    public void writeByte(byte b) throws IOException {
-        if (bufferPosition >= BUFFER_SIZE)
-            flush();
-        buffer[bufferPosition++] = b;
-    }
-
-    /**
-     * Writes an array of bytes.
-     *
-     * @param b      the bytes to write
-     * @param length the number of bytes to write
-     * @see IndexInput#readBytes(byte[], int, int)
-     */
-    @Override
-    public void writeBytes(byte[] b, int offset, int length) throws IOException {
-        int bytesLeft = BUFFER_SIZE - bufferPosition;
-        // is there enough space in the buffer?
-        if (bytesLeft >= length) {
-            // we add the data to the end of the buffer
-            System.arraycopy(b, offset, buffer, bufferPosition, length);
-            bufferPosition += length;
-            // if the buffer is full, flush it
-            if (BUFFER_SIZE - bufferPosition == 0)
-                flush();
-        } else {
-            // is data larger then buffer?
-            if (length > BUFFER_SIZE) {
-                // we flush the buffer
-                if (bufferPosition > 0)
-                    flush();
-                // and write data at once
-                flushBuffer(b, offset, length);
-                bufferStart += length;
-            } else {
-                // we fill/flush the buffer (until the input is written)
-                int pos = 0; // position in the input data
-                int pieceLength;
-                while (pos < length) {
-                    pieceLength = (length - pos < bytesLeft) ? length - pos : bytesLeft;
-                    System.arraycopy(b, pos + offset, buffer, bufferPosition, pieceLength);
-                    pos += pieceLength;
-                    bufferPosition += pieceLength;
-                    // if the buffer is full, flush it
-                    bytesLeft = BUFFER_SIZE - bufferPosition;
-                    if (bytesLeft == 0) {
-                        flush();
-                        bytesLeft = BUFFER_SIZE;
-                    }
-                }
-            }
-        }
-    }
-
-    /**
-     * Forces any buffered output to be written.
-     */
-    @Override
-    public void flush() throws IOException {
-        flushBuffer(buffer, bufferPosition);
-        bufferStart += bufferPosition;
-        bufferPosition = 0;
-    }
-
-    /**
-     * Expert: implements buffer write.  Writes bytes at the current position in
-     * the output.
-     *
-     * @param b   the bytes to write
-     * @param len the number of bytes to write
-     */
-    private void flushBuffer(byte[] b, int len) throws IOException {
-        flushBuffer(b, 0, len);
-    }
-
-    /**
-     * Expert: implements buffer write.  Writes bytes at the current position in
-     * the output.
-     *
-     * @param b      the bytes to write
-     * @param offset the offset in the byte array
-     * @param len    the number of bytes to write
-     */
-    protected abstract void flushBuffer(byte[] b, int offset, int len) throws IOException;
-
-    /**
-     * Closes this stream to further operations.
-     */
-    @Override
-    public void close() throws IOException {
-        flush();
-    }
-
-    /**
-     * Returns the current position in this file, where the next write will
-     * occur.
-     *
-     * @see #seek(long)
-     */
-    @Override
-    public long getFilePointer() {
-        return bufferStart + bufferPosition;
-    }
-
-    /**
-     * Sets current position in this file, where the next write will occur.
-     *
-     * @see #getFilePointer()
-     */
-    @Override
-    public void seek(long pos) throws IOException {
-        flush();
-        bufferStart = pos;
-    }
-
-    /**
-     * The number of bytes in the file.
-     */
-    @Override
-    public abstract long length() throws IOException;
-
-
-}
--- a/src/main/java/org/elasticsearch/common/lucene/Lucene.java
+++ b/src/main/java/org/elasticsearch/common/lucene/Lucene.java
@ -42,7 +42,7 @@ import java.lang.reflect.Field;
 */
 public class Lucene {

-    public static final Version VERSION = Version.LUCENE_41;
+    public static final Version VERSION = Version.LUCENE_42;
    public static final Version ANALYZER_VERSION = VERSION;
    public static final Version QUERYPARSER_VERSION = VERSION;

@ -57,6 +57,9 @@ public class Lucene {
        if (version == null) {
            return defaultVersion;
        }
+        if ("4.2".equals(version)) {
+            return Version.LUCENE_42;
+        }
        if ("4.1".equals(version)) {
            return Version.LUCENE_41;
        }
--- a/src/main/java/org/elasticsearch/common/lucene/store/BufferedChecksumIndexOutput.java
+++ b/src/main/java/org/elasticsearch/common/lucene/store/BufferedChecksumIndexOutput.java
@ -19,15 +19,15 @@

 package org.elasticsearch.common.lucene.store;

+import org.apache.lucene.store.BufferedIndexOutput;
 import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.OpenBufferedIndexOutput;

 import java.io.IOException;
 import java.util.zip.Checksum;

 /**
 */
-public class BufferedChecksumIndexOutput extends OpenBufferedIndexOutput {
+public class BufferedChecksumIndexOutput extends BufferedIndexOutput {

    private final IndexOutput out;

@ -36,7 +36,7 @@ public class BufferedChecksumIndexOutput extends OpenBufferedIndexOutput {
    public BufferedChecksumIndexOutput(IndexOutput out, Checksum digest) {
        // we add 8 to be bigger than the default BufferIndexOutput buffer size so any flush will go directly
        // to the output without being copied over to the delegate buffer
-        super(OpenBufferedIndexOutput.DEFAULT_BUFFER_SIZE + 64);
+        super(BufferedIndexOutput.DEFAULT_BUFFER_SIZE + 64);
        this.out = out;
        this.digest = digest;
    }
--- a/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
+++ b/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
@ -20,8 +20,7 @@
 package org.elasticsearch.index.codec;

 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene40.Lucene40Codec;
-import org.apache.lucene.codecs.lucene41.Lucene41Codec;
+import org.apache.lucene.codecs.lucene42.Lucene42Codec;
 import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
 import org.elasticsearch.index.mapper.MapperService;

@ -34,7 +33,7 @@ import org.elasticsearch.index.mapper.MapperService;
 * configured for a specific field the default postings format is used.
 */
 // LUCENE UPGRADE: make sure to move to a new codec depending on the lucene version
-public class PerFieldMappingPostingFormatCodec extends Lucene41Codec {
+public class PerFieldMappingPostingFormatCodec extends Lucene42Codec {

    private final MapperService mapperService;
    private final PostingsFormat defaultPostingFormat;
--- a/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java
+++ b/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java
@ -111,7 +111,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
            IndexInput bloomIn = null;
            boolean success = false;
            try {
-                bloomIn = state.dir.openInput(bloomFileName, state.context);
+                bloomIn = state.directory.openInput(bloomFileName, state.context);
                CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
                        BLOOM_CODEC_VERSION);
                // // Load the hash function used in the BloomFilter
@ -209,7 +209,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
            }

            @Override
-            public Comparator<BytesRef> getComparator() throws IOException {
+            public Comparator<BytesRef> getComparator() {
                return delegateTerms.getComparator();
            }

--- a/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java
@ -48,11 +48,13 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S

    private int[] hashes;
    private long size = -1;
+    private final long readerBytesSize;

-    public PagedBytesAtomicFieldData(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals ordinals) {
+    public PagedBytesAtomicFieldData(PagedBytes.Reader bytes, long readerBytesSize, PackedInts.Reader termOrdToBytesOffset, Ordinals ordinals) {
        this.bytes = bytes;
        this.termOrdToBytesOffset = termOrdToBytesOffset;
        this.ordinals = ordinals;
+        this.readerBytesSize = readerBytesSize;
    }

    @Override
@ -79,10 +81,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
        if (size == -1) {
            long size = ordinals.getMemorySizeInBytes();
            // PackedBytes
-            size += RamUsage.NUM_BYTES_ARRAY_HEADER + bytes.getBlocks().length;
-            for (byte[] b : bytes.getBlocks()) {
-                size += b.length;
-            }
+            size += readerBytesSize;
            // PackedInts
            size += termOrdToBytesOffset.ramBytesUsed();
            this.size = size;
@ -102,8 +101,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
            int[] hashes = new int[numberOfValues];
            BytesRef scratch = new BytesRef();
            for (int i = 0; i < numberOfValues; i++) {
-                BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(i));
-                hashes[i] = value == null ? 0 : value.hashCode();
+                bytes.fill(scratch, termOrdToBytesOffset.get(i));
+                hashes[i] = scratch.hashCode();
            }
            this.hashes = hashes;
        }
@ -141,17 +140,21 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S

        @Override
        public BytesRef getValueByOrd(int ord) {
-            return bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+            bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+            return scratch;
        }

        @Override
        public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
-            return bytes.fill(ret, termOrdToBytesOffset.get(ord));
+            bytes.fill(ret, termOrdToBytesOffset.get(ord));
+            return ret;
        }

        @Override
        public BytesRef getSafeValueByOrd(int ord) {
-            return bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
+            final BytesRef retVal = new BytesRef();
+            bytes.fill(retVal, termOrdToBytesOffset.get(ord));
+            return retVal;
        }

        @Override
@ -168,12 +171,14 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
        public BytesRef getValue(int docId) {
            int ord = ordinals.getOrd(docId);
            if (ord == 0) return null;
-            return bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+            bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+            return scratch;
        }

        @Override
        public BytesRef getValueScratch(int docId, BytesRef ret) {
-            return bytes.fill(ret, termOrdToBytesOffset.get(ordinals.getOrd(docId)));
+            bytes.fill(ret, termOrdToBytesOffset.get(ordinals.getOrd(docId)));
+            return ret;
        }

        static class Single extends BytesValues {
@ -194,7 +199,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
            public BytesRefArrayRef getValues(int docId) {
                int ord = ordinals.getOrd(docId);
                if (ord == 0) return BytesRefArrayRef.EMPTY;
-                arrayScratch.values[0] = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
+                arrayScratch.values[0] = new BytesRef();
+                bytes.fill(arrayScratch.values[0], termOrdToBytesOffset.get(ord));
                return arrayScratch;
            }

@ -202,7 +208,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
            public Iter getIter(int docId) {
                int ord = ordinals.getOrd(docId);
                if (ord == 0) return Iter.Empty.INSTANCE;
-                return iter.reset(bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
+                bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+                return iter.reset(scratch);
            }

            @Override
@ -211,7 +218,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
                if (ord == 0) {
                    proc.onMissing(docId);
                } else {
-                    proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
+                    bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+                    proc.onValue(docId, scratch);
                }
            }
        }
@ -239,7 +247,9 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S

                arrayScratch.reset(size);
                for (int i = ords.start; i < ords.end; i++) {
-                    arrayScratch.values[arrayScratch.end++] = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ords.values[i]));
+                    final BytesRef bytesRef = new BytesRef();
+                    bytes.fill(bytesRef, termOrdToBytesOffset.get(ords.values[i]));
+                    arrayScratch.values[arrayScratch.end++] = bytesRef;
                }
                return arrayScratch;
            }
@ -258,7 +268,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
                    return;
                }
                do {
-                    proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
+                    bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+                    proc.onValue(docId, scratch);
                } while ((ord = iter.next()) != 0);
            }

@ -288,9 +299,9 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S

                @Override
                public BytesRef next() {
-                    BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+                    bytes.fill(scratch, termOrdToBytesOffset.get(ord));
                    ord = ordsIter.next();
-                    return value;
+                    return scratch;
                }
            }
        }
@ -320,12 +331,15 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S

        @Override
        public HashedBytesRef getValueByOrd(int ord) {
-            return scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
+            bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
+            return scratch.reset(scratch1, hashes[ord]);
        }

        @Override
        public HashedBytesRef getSafeValueByOrd(int ord) {
-            return new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]);
+            final BytesRef bytesRef = new BytesRef();
+            bytes.fill(bytesRef, termOrdToBytesOffset.get(ord));
+            return new HashedBytesRef(bytesRef, hashes[ord]);
        }

        @Override
@ -342,7 +356,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
        public HashedBytesRef getValue(int docId) {
            int ord = ordinals.getOrd(docId);
            if (ord == 0) return null;
-            return scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
+            bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
+            return scratch.reset(scratch1, hashes[ord]);
        }

        static class Single extends HashedBytesValues {
@ -362,7 +377,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
            public Iter getIter(int docId) {
                int ord = ordinals.getOrd(docId);
                if (ord == 0) return Iter.Empty.INSTANCE;
-                return iter.reset(scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
+                bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
+                return iter.reset(scratch.reset(scratch1, hashes[ord]));
            }

            @Override
@ -371,7 +387,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
                if (ord == 0) {
                    proc.onMissing(docId);
                } else {
-                    proc.onValue(docId, scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
+                    bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
+                    proc.onValue(docId, scratch.reset(scratch1, hashes[ord]));
                }
            }
        }
@ -404,7 +421,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
                    return;
                }
                do {
-                    proc.onValue(docId, scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
+                    bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
+                    proc.onValue(docId, scratch.reset(scratch1, hashes[ord]));
                } while ((ord = iter.next()) != 0);
            }

@ -438,7 +456,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S

                @Override
                public HashedBytesRef next() {
-                    HashedBytesRef value = scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
+                    bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
+                    HashedBytesRef value = scratch.reset(scratch1, hashes[ord]);
                    ord = ordsIter.next();
                    return value;
                }
@ -467,8 +486,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S

        @Override
        public String getValueByOrd(int ord) {
-            BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
-            return value.utf8ToString();
+            bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+            return scratch.utf8ToString();
        }

        @Override
@ -480,8 +499,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
        public String getValue(int docId) {
            int ord = ordinals.getOrd(docId);
            if (ord == 0) return null;
-            BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
-            return value.utf8ToString();
+            bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+            return scratch.utf8ToString();
        }

        static class Single extends StringValues {
@ -502,8 +521,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
            public StringArrayRef getValues(int docId) {
                int ord = ordinals.getOrd(docId);
                if (ord == 0) return StringArrayRef.EMPTY;
-                BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
-                arrayScratch.values[0] = value == null ? null : value.utf8ToString();
+                bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+                arrayScratch.values[0] = scratch.utf8ToString();
                return arrayScratch;
            }

@ -511,7 +530,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
            public Iter getIter(int docId) {
                int ord = ordinals.getOrd(docId);
                if (ord == 0) return Iter.Empty.INSTANCE;
-                return iter.reset(bytes.fill(scratch, termOrdToBytesOffset.get(ord)).utf8ToString());
+                bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+                return iter.reset(scratch.utf8ToString());
            }

            @Override
@ -521,7 +541,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
                    proc.onMissing(docId);
                    return;
                }
-                proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)).utf8ToString());
+                bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+                proc.onValue(docId, scratch.utf8ToString());
            }
        }

@ -548,8 +569,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S

                arrayScratch.reset(size);
                for (int i = ords.start; i < ords.end; i++) {
-                    BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ords.values[i]));
-                    arrayScratch.values[arrayScratch.end++] = value == null ? null : value.utf8ToString();
+                    bytes.fill(scratch, termOrdToBytesOffset.get(ords.values[i]));
+                    arrayScratch.values[arrayScratch.end++] = scratch.utf8ToString();
                }
                return arrayScratch;
            }
@ -568,8 +589,8 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
                    return;
                }
                do {
-                    BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
-                    proc.onValue(docId, value == null ? null : value.utf8ToString());
+                    bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+                    proc.onValue(docId, scratch.utf8ToString());
                } while ((ord = iter.next()) != 0);
            }

@ -599,9 +620,9 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S

                @Override
                public String next() {
-                    BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
+                    bytes.fill(scratch, termOrdToBytesOffset.get(ord));
                    ord = ordsIter.next();
-                    return value == null ? null : value.utf8ToString();
+                    return scratch.utf8ToString();
                }
            }
        }
@ -610,7 +631,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
    static class Empty extends PagedBytesAtomicFieldData {

        Empty(int numDocs) {
-            super(emptyBytes(), new GrowableWriter(1, 2, PackedInts.FASTEST).getMutable(), new EmptyOrdinals(numDocs));
+            super(emptyBytes(), 0, new GrowableWriter(1, 2, PackedInts.FASTEST).getMutable(), new EmptyOrdinals(numDocs));
        }

        static PagedBytes.Reader emptyBytes() {
--- a/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesIndexFieldData.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesIndexFieldData.java
@ -142,11 +142,12 @@ public class PagedBytesIndexFieldData extends AbstractIndexFieldData<PagedBytesA
                    builder.addDoc(docId);
                }
            }
+            final long sizePointer = bytes.getPointer();
            PagedBytes.Reader bytesReader = bytes.freeze(true);
            PackedInts.Reader termOrdToBytesOffsetReader = termOrdToBytesOffset.getMutable();
            final Ordinals ordinals = builder.build(fieldDataType.getSettings());

-            return new PagedBytesAtomicFieldData(bytesReader, termOrdToBytesOffsetReader, ordinals);
+            return new PagedBytesAtomicFieldData(bytesReader, sizePointer, termOrdToBytesOffsetReader, ordinals);
        } finally {
            builder.close();
        }
--- a/src/main/java/org/elasticsearch/search/highlight/CustomQueryScorer.java
+++ b/src/main/java/org/elasticsearch/search/highlight/CustomQueryScorer.java
@ -19,28 +19,18 @@

 package org.elasticsearch.search.highlight;

-import org.apache.lucene.index.AtomicReader;
+import java.io.IOException;
+import java.util.Map;
+
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queries.XCommonTermsQuery;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.ConstantScoreQuery;
-import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.WeightedSpanTerm;
 import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
-import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
 import org.elasticsearch.common.lucene.search.XFilteredQuery;
 import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
 import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;

-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
 public final class CustomQueryScorer extends QueryScorer {

    public CustomQueryScorer(Query query, IndexReader reader, String field,
@ -93,30 +83,9 @@ public final class CustomQueryScorer extends QueryScorer {
            } else if (query instanceof FiltersFunctionScoreQuery) {
                query = ((FiltersFunctionScoreQuery) query).getSubQuery();
                extract(query, terms);
-            } else if (query instanceof ConstantScoreQuery) {
-                ConstantScoreQuery q = (ConstantScoreQuery) query;
-                if (q.getQuery() != null) {
-                    query = q.getQuery();
-                    extract(query, terms);
-                }
-            } else if (query instanceof FilteredQuery) {
-                query = ((FilteredQuery) query).getQuery();
-                extract(query, terms);
            } else if (query instanceof XFilteredQuery) {
                query = ((XFilteredQuery) query).getQuery();
                extract(query, terms);
-            } else if (query instanceof XCommonTermsQuery) {
-                XCommonTermsQuery ctq = ((XCommonTermsQuery)query);
-                List<Term> ctqTerms = ctq.terms();
-                BooleanQuery bq = new BooleanQuery();
-                for (Term term : ctqTerms) {
-                    bq.add(new TermQuery(term), Occur.SHOULD);    
-                }
-                extract(bq, terms);
-            } else if (query instanceof MultiPhrasePrefixQuery) {
-                MultiPhrasePrefixQuery q = ((MultiPhrasePrefixQuery)query);
-                AtomicReader atomicReader = getLeafContextForField(q.getField()).reader();
-                extract(q.rewrite(atomicReader), terms);
            }
        }

--- a/src/test/java/org/elasticsearch/test/unit/deps/lucene/LuceneFieldCacheTests.java
+++ b/src/test/java/org/elasticsearch/test/unit/deps/lucene/LuceneFieldCacheTests.java
@ -23,6 +23,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.IntField;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.FieldCache.Ints;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 import org.elasticsearch.common.lucene.Lucene;
@ -56,8 +57,7 @@ public class LuceneFieldCacheTests {
        indexWriter.addDocument(doc);

        AtomicReader reader = SlowCompositeReaderWrapper.wrap(IndexReader.open(indexWriter, true));
-        int[] ints = FieldCache.DEFAULT.getInts(reader, "int1", false);
-        assertThat(ints.length, equalTo(1));
-        assertThat(ints[0], equalTo(2));
+        Ints ints = FieldCache.DEFAULT.getInts(reader, "int1", false);
+        assertThat(ints.get(0), equalTo(2));
    }
 }
--- a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java
+++ b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java
@ -26,7 +26,7 @@ import org.apache.lucene.queries.TermsFilter;
 import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.spans.*;
-import org.apache.lucene.spatial.prefix.RecursivePrefixTreeFilter;
+import org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.NumericUtils;
 import org.elasticsearch.cluster.ClusterService;
@ -2027,7 +2027,7 @@ public class SimpleIndexQueryParserTests {
        Query parsedQuery = queryParser.parse(query).query();
        assertThat(parsedQuery, instanceOf(XConstantScoreQuery.class));
        XConstantScoreQuery constantScoreQuery = (XConstantScoreQuery) parsedQuery;
-        assertThat(constantScoreQuery.getFilter(), instanceOf(RecursivePrefixTreeFilter.class));
+        assertThat(constantScoreQuery.getFilter(), instanceOf(IntersectsPrefixTreeFilter.class));
    }

    @Test
@ -2037,6 +2037,6 @@ public class SimpleIndexQueryParserTests {
        Query parsedQuery = queryParser.parse(query).query();
        assertThat(parsedQuery, instanceOf(ConstantScoreQuery.class));
        ConstantScoreQuery csq = (ConstantScoreQuery) parsedQuery;
-        assertThat(csq.getFilter(), instanceOf(RecursivePrefixTreeFilter.class));
+        assertThat(csq.getFilter(), instanceOf(IntersectsPrefixTreeFilter.class));
    }
 }