Upgrade to Lucene 4.4

This commit is contained in:
Simon Willnauer 2013-07-16 15:41:23 +02:00
parent 92a7030558
commit 2e9851138e
47 changed files with 187 additions and 1847 deletions

View File

@ -30,7 +30,7 @@
</parent>
<properties>
<lucene.version>4.3.1</lucene.version>
<lucene.version>4.4.0</lucene.version>
</properties>
<repositories>

View File

@ -1,214 +0,0 @@
package org.apache.lucene.analysis.ngram;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.elasticsearch.common.lucene.Lucene;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.XCharacterUtils;
import org.apache.lucene.util.Version;
import java.io.IOException;
/**
* Tokenizes the given token into n-grams of given size(s).
* <p>
* This {@link TokenFilter} create n-grams from the beginning edge or ending edge of a input token.
* <p><a name="version"/>As of Lucene 4.4, this filter does not support
* {@link Side#BACK} (you can use {@link ReverseStringFilter} up-front and
* afterward to get the same behavior), handles supplementary characters
* correctly and does not update offsets anymore.
*/
public final class XEdgeNGramTokenFilter extends TokenFilter {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
public static final Side DEFAULT_SIDE = Side.FRONT;
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
/** Specifies which side of the input the n-gram should be generated from */
public static enum Side {
/** Get the n-gram from the front of the input */
FRONT {
@Override
public String getLabel() { return "front"; }
},
/** Get the n-gram from the end of the input */
@Deprecated
BACK {
@Override
public String getLabel() { return "back"; }
};
public abstract String getLabel();
// Get the appropriate Side from a string
public static Side getSide(String sideName) {
if (FRONT.getLabel().equals(sideName)) {
return FRONT;
}
if (BACK.getLabel().equals(sideName)) {
return BACK;
}
return null;
}
}
private final XCharacterUtils charUtils;
private final int minGram;
private final int maxGram;
private Side side;
private char[] curTermBuffer;
private int curTermLength;
private int curCodePointCount;
private int curGramSize;
private int tokStart;
private int tokEnd;
private int savePosIncr;
private int savePosLen;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
/**
* Creates XEdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param input {@link TokenStream} holding the input to be tokenized
* @param side the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
@Deprecated
public XEdgeNGramTokenFilter(Version version, TokenStream input, Side side, int minGram, int maxGram) {
super(input);
if (version == null) {
throw new IllegalArgumentException("version must not be null");
}
if (side == Side.BACK) {
throw new IllegalArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
}
if (side == null) {
throw new IllegalArgumentException("sideLabel must be either front or back");
}
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.charUtils = XCharacterUtils.getInstance(version);
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
}
/**
* Creates XEdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param input {@link TokenStream} holding the input to be tokenized
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
@Deprecated
public XEdgeNGramTokenFilter(Version version, TokenStream input, String sideLabel, int minGram, int maxGram) {
this(version, input, Side.getSide(sideLabel), minGram, maxGram);
}
/**
* Creates XEdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param input {@link TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public XEdgeNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
this(version, input, Side.FRONT, minGram, maxGram);
}
@Override
public final boolean incrementToken() throws IOException {
while (true) {
if (curTermBuffer == null) {
if (!input.incrementToken()) {
return false;
} else {
curTermBuffer = termAtt.buffer().clone();
curTermLength = termAtt.length();
curCodePointCount = charUtils.codePointCount(termAtt);
curGramSize = minGram;
tokStart = offsetAtt.startOffset();
tokEnd = offsetAtt.endOffset();
savePosIncr += posIncrAtt.getPositionIncrement();
savePosLen = posLenAtt.getPositionLength();
}
}
if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit
if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
// grab gramSize chars from front or back
final int start = side == Side.FRONT ? 0 : charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, curTermLength, -curGramSize);
final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
clearAttributes();
offsetAtt.setOffset(tokStart, tokEnd);
// first ngram gets increment, others don't
if (curGramSize == minGram) {
posIncrAtt.setPositionIncrement(savePosIncr);
savePosIncr = 0;
} else {
posIncrAtt.setPositionIncrement(0);
}
posLenAtt.setPositionLength(savePosLen);
termAtt.copyBuffer(curTermBuffer, start, end - start);
curGramSize++;
return true;
}
}
curTermBuffer = null;
}
}
@Override
public void reset() throws IOException {
super.reset();
curTermBuffer = null;
savePosIncr = 0;
}
}

View File

@ -1,77 +0,0 @@
package org.apache.lucene.analysis.ngram;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.elasticsearch.common.lucene.Lucene;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Version;
/**
* Tokenizes the input from an edge into n-grams of given size(s).
* <p>
* This {@link Tokenizer} create n-grams from the beginning edge or ending edge of a input token.
* <p><a name="version" /> As of Lucene 4.4, this tokenizer<ul>
* <li>can handle <code>maxGram</code> larger than 1024 chars, but beware that this will result in increased memory usage
* <li>doesn't trim the input,
* <li>sets position increments equal to 1 instead of 1 for the first token and 0 for all other ones
* <li>doesn't support backward n-grams anymore.
* <li>supports {@link #isTokenChar(int) pre-tokenization},
* <li>correctly handles supplementary characters.
* </ul>
* <p>Although <b style="color:red">highly</b> discouraged, it is still possible
* to use the old behavior through {@link Lucene43XEdgeXNGramTokenizer}.
*/
public class XEdgeNGramTokenizer extends XNGramTokenizer {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
/**
* Creates XEdgeXNGramTokenizer that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public XEdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
super(version, input, minGram, maxGram, true);
}
/**
* Creates XEdgeXNGramTokenizer that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public XEdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
super(version, factory, input, minGram, maxGram, true);
}
}

View File

@ -1,281 +0,0 @@
package org.apache.lucene.analysis.ngram;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.elasticsearch.common.lucene.Lucene;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Version;
/**
* Old version of {@link EdgeNGramTokenizer} which doesn't handle correctly
* supplementary characters.
*/
@Deprecated
public final class XLucene43EdgeNGramTokenizer extends Tokenizer {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
public static final Side DEFAULT_SIDE = Side.FRONT;
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
/** Specifies which side of the input the n-gram should be generated from */
public static enum Side {
/** Get the n-gram from the front of the input */
FRONT {
@Override
public String getLabel() { return "front"; }
},
/** Get the n-gram from the end of the input */
BACK {
@Override
public String getLabel() { return "back"; }
};
public abstract String getLabel();
// Get the appropriate Side from a string
public static Side getSide(String sideName) {
if (FRONT.getLabel().equals(sideName)) {
return FRONT;
}
if (BACK.getLabel().equals(sideName)) {
return BACK;
}
return null;
}
}
private int minGram;
private int maxGram;
private int gramSize;
private Side side;
private boolean started;
private int inLen; // length of the input AFTER trim()
private int charsRead; // length of the input
private String inStr;
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param input {@link Reader} holding the input to be tokenized
* @param side the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
@Deprecated
public XLucene43EdgeNGramTokenizer(Version version, Reader input, Side side, int minGram, int maxGram) {
super(input);
init(version, side, minGram, maxGram);
}
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
* @param input {@link Reader} holding the input to be tokenized
* @param side the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
@Deprecated
public XLucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, Side side, int minGram, int maxGram) {
super(factory, input);
init(version, side, minGram, maxGram);
}
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param input {@link Reader} holding the input to be tokenized
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
@Deprecated
public XLucene43EdgeNGramTokenizer(Version version, Reader input, String sideLabel, int minGram, int maxGram) {
this(version, input, Side.getSide(sideLabel), minGram, maxGram);
}
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
* @param input {@link Reader} holding the input to be tokenized
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
@Deprecated
public XLucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, String sideLabel, int minGram, int maxGram) {
this(version, factory, input, Side.getSide(sideLabel), minGram, maxGram);
}
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public XLucene43EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
this(version, input, Side.FRONT, minGram, maxGram);
}
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public XLucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
this(version, factory, input, Side.FRONT, minGram, maxGram);
}
private void init(Version version, Side side, int minGram, int maxGram) {
if (version == null) {
throw new IllegalArgumentException("version must not be null");
}
if (side == null) {
throw new IllegalArgumentException("sideLabel must be either front or back");
}
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
maxGram = Math.min(maxGram, 1024);
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
}
/** Returns the next token in the stream, or null at EOS. */
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
// if we are just starting, read the whole input
if (!started) {
started = true;
gramSize = minGram;
final int limit = side == Side.FRONT ? maxGram : 1024;
char[] chars = new char[Math.min(1024, limit)];
charsRead = 0;
// TODO: refactor to a shared readFully somewhere:
boolean exhausted = false;
while (charsRead < limit) {
final int inc = input.read(chars, charsRead, chars.length-charsRead);
if (inc == -1) {
exhausted = true;
break;
}
charsRead += inc;
if (charsRead == chars.length && charsRead < limit) {
chars = ArrayUtil.grow(chars);
}
}
inStr = new String(chars, 0, charsRead);
inStr = inStr.trim();
if (!exhausted) {
// Read extra throwaway chars so that on end() we
// report the correct offset:
char[] throwaway = new char[1024];
while(true) {
final int inc = input.read(throwaway, 0, throwaway.length);
if (inc == -1) {
break;
}
charsRead += inc;
}
}
inLen = inStr.length();
if (inLen == 0) {
return false;
}
posIncrAtt.setPositionIncrement(1);
} else {
posIncrAtt.setPositionIncrement(0);
}
// if the remaining input is too short, we can't generate any n-grams
if (gramSize > inLen) {
return false;
}
// if we have hit the end of our n-gram size range, quit
if (gramSize > maxGram || gramSize > inLen) {
return false;
}
// grab gramSize chars from front or back
int start = side == Side.FRONT ? 0 : inLen - gramSize;
int end = start + gramSize;
termAtt.setEmpty().append(inStr, start, end);
offsetAtt.setOffset(correctOffset(start), correctOffset(end));
gramSize++;
return true;
}
@Override
public void end() {
// set final offset
final int finalOffset = correctOffset(charsRead);
this.offsetAtt.setOffset(finalOffset, finalOffset);
}
@Override
public void reset() throws IOException {
super.reset();
started = false;
}
}

View File

@ -1,164 +0,0 @@
package org.apache.lucene.analysis.ngram;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
/**
* Old broken version of {@link NGramTokenizer}.
*/
@Deprecated
public final class XLucene43NGramTokenizer extends Tokenizer {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
private int minGram, maxGram;
private int gramSize;
private int pos;
private int inLen; // length of the input AFTER trim()
private int charsRead; // length of the input
private String inStr;
private boolean started;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Creates NGramTokenizer with given min and max n-grams.
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public XLucene43NGramTokenizer(Reader input, int minGram, int maxGram) {
super(input);
init(minGram, maxGram);
}
/**
* Creates NGramTokenizer with given min and max n-grams.
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public XLucene43NGramTokenizer(AttributeFactory factory, Reader input, int minGram, int maxGram) {
super(factory, input);
init(minGram, maxGram);
}
/**
* Creates NGramTokenizer with default min and max n-grams.
* @param input {@link Reader} holding the input to be tokenized
*/
public XLucene43NGramTokenizer(Reader input) {
this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
}
private void init(int minGram, int maxGram) {
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.minGram = minGram;
this.maxGram = maxGram;
}
/** Returns the next token in the stream, or null at EOS. */
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (!started) {
started = true;
gramSize = minGram;
char[] chars = new char[1024];
charsRead = 0;
// TODO: refactor to a shared readFully somewhere:
while (charsRead < chars.length) {
int inc = input.read(chars, charsRead, chars.length-charsRead);
if (inc == -1) {
break;
}
charsRead += inc;
}
inStr = new String(chars, 0, charsRead).trim(); // remove any trailing empty strings
if (charsRead == chars.length) {
// Read extra throwaway chars so that on end() we
// report the correct offset:
char[] throwaway = new char[1024];
while(true) {
final int inc = input.read(throwaway, 0, throwaway.length);
if (inc == -1) {
break;
}
charsRead += inc;
}
}
inLen = inStr.length();
if (inLen == 0) {
return false;
}
}
if (pos+gramSize > inLen) { // if we hit the end of the string
pos = 0; // reset to beginning of string
gramSize++; // increase n-gram size
if (gramSize > maxGram) // we are done
return false;
if (pos+gramSize > inLen)
return false;
}
int oldPos = pos;
pos++;
termAtt.setEmpty().append(inStr, oldPos, oldPos+gramSize);
offsetAtt.setOffset(correctOffset(oldPos), correctOffset(oldPos+gramSize));
return true;
}
@Override
public void end() {
// set final offset
final int finalOffset = correctOffset(charsRead);
this.offsetAtt.setOffset(finalOffset, finalOffset);
}
@Override
public void reset() throws IOException {
super.reset();
started = false;
pos = 0;
}
}

View File

@ -1,158 +0,0 @@
package org.apache.lucene.analysis.ngram;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.elasticsearch.common.lucene.Lucene;
import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.XCharacterUtils;
import org.apache.lucene.util.Version;
/**
* Tokenizes the input into n-grams of the given size(s).
* <a name="version"/>
* <p>You must specify the required {@link Version} compatibility when
* creating a {@link XNGramTokenFilter}. As of Lucene 4.4, this token filters:<ul>
* <li>handles supplementary characters correctly,</li>
* <li>emits all n-grams for the same token at the same position,</li>
* <li>does not modify offsets,</li>
* <li>sorts n-grams by their offset in the original token first, then
* increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
* "c").</li></ul>
* <p>You can make this filter use the old behavior by providing a version &lt;
* {@link Version#LUCENE_44} in the constructor but this is not recommended as
* it will lead to broken {@link TokenStream}s that will cause highlighting
* bugs.
* <p>If you were using this {@link TokenFilter} to perform partial highlighting,
* this won't work anymore since this filter doesn't update offsets. You should
* modify your analysis chain to use {@link NGramTokenizer}, and potentially
* override {@link NGramTokenizer#isTokenChar(int)} to perform pre-tokenization.
*/
public final class XNGramTokenFilter extends TokenFilter {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
private final int minGram, maxGram;
private char[] curTermBuffer;
private int curTermLength;
private int curCodePointCount;
private int curGramSize;
private int curPos;
private int curPosInc, curPosLen;
private int tokStart;
private int tokEnd;
private final XCharacterUtils charUtils;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAtt;
private final PositionLengthAttribute posLenAtt;
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Creates XNGramTokenFilter with given min and max n-grams.
* @param version Lucene version to enable correct position increments.
* See <a href="#version">above</a> for details.
* @param input {@link TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public XNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
super(new LengthFilter(true, input, minGram, Integer.MAX_VALUE));
this.charUtils = XCharacterUtils.getInstance(version);
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.minGram = minGram;
this.maxGram = maxGram;
posIncAtt = addAttribute(PositionIncrementAttribute.class);
posLenAtt = addAttribute(PositionLengthAttribute.class);
}
/**
* Creates XNGramTokenFilter with default min and max n-grams.
* @param version Lucene version to enable correct position increments.
* See <a href="#version">above</a> for details.
* @param input {@link TokenStream} holding the input to be tokenized
*/
public XNGramTokenFilter(Version version, TokenStream input) {
this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
}
/** Returns the next token in the stream, or null at EOS. */
@Override
public final boolean incrementToken() throws IOException {
while (true) {
if (curTermBuffer == null) {
if (!input.incrementToken()) {
return false;
} else {
curTermBuffer = termAtt.buffer().clone();
curTermLength = termAtt.length();
curCodePointCount = charUtils.codePointCount(termAtt);
curGramSize = minGram;
curPos = 0;
curPosInc = posIncAtt.getPositionIncrement();
curPosLen = posLenAtt.getPositionLength();
tokStart = offsetAtt.startOffset();
tokEnd = offsetAtt.endOffset();
}
}
if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount) {
++curPos;
curGramSize = minGram;
}
if ((curPos + curGramSize) <= curCodePointCount) {
clearAttributes();
final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
termAtt.copyBuffer(curTermBuffer, start, end - start);
posIncAtt.setPositionIncrement(curPosInc);
curPosInc = 0;
posLenAtt.setPositionLength(curPosLen);
offsetAtt.setOffset(tokStart, tokEnd);
curGramSize++;
return true;
}
curTermBuffer = null;
}
}
@Override
public void reset() throws IOException {
super.reset();
curTermBuffer = null;
}
}

View File

@ -1,248 +0,0 @@
package org.apache.lucene.analysis.ngram;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.elasticsearch.common.lucene.Lucene;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.XCharacterUtils;
import org.apache.lucene.util.Version;
/**
* Tokenizes the input into n-grams of the given size(s).
* <p>On the contrary to {@link NGramTokenFilter}, this class sets offsets so
* that characters between startOffset and endOffset in the original stream are
* the same as the term chars.
* <p>For example, "abcde" would be tokenized as (minGram=2, maxGram=3):
* <table>
* <tr><th>Term</th><td>ab</td><td>abc</td><td>bc</td><td>bcd</td><td>cd</td><td>cde</td><td>de</td></tr>
* <tr><th>Position increment</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
* <tr><th>Position length</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
* <tr><th>Offsets</th><td>[0,2[</td><td>[0,3[</td><td>[1,3[</td><td>[1,4[</td><td>[2,4[</td><td>[2,5[</td><td>[3,5[</td></tr>
* </table>
* <a name="version"/>
* <p>This tokenizer changed a lot in Lucene 4.4 in order to:<ul>
* <li>tokenize in a streaming fashion to support streams which are larger
* than 1024 chars (limit of the previous version),
* <li>count grams based on unicode code points instead of java chars (and
* never split in the middle of surrogate pairs),
* <li>give the ability to {@link #isTokenChar(int) pre-tokenize} the stream
* before computing n-grams.</ul>
* <p>Additionally, this class doesn't trim trailing whitespaces and emits
* tokens in a different order, tokens are now emitted by increasing start
* offsets while they used to be emitted by increasing lengths (which prevented
* from supporting large input streams).
* <p>Although <b style="color:red">highly</b> discouraged, it is still possible
* to use the old behavior through {@link Lucene43NGramTokenizer}.
*/
// non-final to allow for overriding isTokenChar, but all other methods should be final
public class XNGramTokenizer extends Tokenizer {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
private XCharacterUtils charUtils;
private XCharacterUtils.CharacterBuffer charBuffer;
private int[] buffer; // like charBuffer, but converted to code points
private int bufferStart, bufferEnd; // remaining slice in buffer
private int offset;
private int gramSize;
private int minGram, maxGram;
private boolean exhausted;
private int lastCheckedChar; // last offset in the buffer that we checked
private int lastNonTokenChar; // last offset that we found to not be a token char
private boolean edgesOnly; // leading edges n-grams only
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
XNGramTokenizer(Version version, Reader input, int minGram, int maxGram, boolean edgesOnly) {
super(input);
init(version, minGram, maxGram, edgesOnly);
}
/**
* Creates NGramTokenizer with given min and max n-grams.
* @param version the lucene compatibility <a href="#version">version</a>
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public XNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
this(version, input, minGram, maxGram, false);
}
XNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram, boolean edgesOnly) {
super(factory, input);
init(version, minGram, maxGram, edgesOnly);
}
/**
* Creates NGramTokenizer with given min and max n-grams.
* @param version the lucene compatibility <a href="#version">version</a>
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
* @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public XNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
this(version, factory, input, minGram, maxGram, false);
}
/**
* Creates NGramTokenizer with default min and max n-grams.
* @param version the lucene compatibility <a href="#version">version</a>
* @param input {@link Reader} holding the input to be tokenized
*/
public XNGramTokenizer(Version version, Reader input) {
this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
}
private void init(Version version, int minGram, int maxGram, boolean edgesOnly) {
if (!version.onOrAfter(Version.LUCENE_43)) {
throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
}
charUtils = version.onOrAfter(Version.LUCENE_43)
? XCharacterUtils.getInstance(version)
: XCharacterUtils.getJava4Instance();
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.minGram = minGram;
this.maxGram = maxGram;
this.edgesOnly = edgesOnly;
charBuffer = XCharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
buffer = new int[charBuffer.getBuffer().length];
// Make the term att large enough
termAtt.resizeBuffer(2 * maxGram);
}
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
// termination of this loop is guaranteed by the fact that every iteration
// either advances the buffer (calls consumes()) or increases gramSize
while (true) {
// compact
if (bufferStart >= bufferEnd - maxGram - 1 && !exhausted) {
System.arraycopy(buffer, bufferStart, buffer, 0, bufferEnd - bufferStart);
bufferEnd -= bufferStart;
lastCheckedChar -= bufferStart;
lastNonTokenChar -= bufferStart;
bufferStart = 0;
// fill in remaining space
exhausted = !charUtils.fill(charBuffer, input, buffer.length - bufferEnd);
// convert to code points
bufferEnd += charUtils.toCodePoints(charBuffer.getBuffer(), 0, charBuffer.getLength(), buffer, bufferEnd);
}
// should we go to the next offset?
if (gramSize > maxGram || (bufferStart + gramSize) > bufferEnd) {
if (bufferStart + 1 + minGram > bufferEnd) {
assert exhausted;
return false;
}
consume();
gramSize = minGram;
}
updateLastNonTokenChar();
// retry if the token to be emitted was going to not only contain token chars
final boolean termContainsNonTokenChar = lastNonTokenChar >= bufferStart && lastNonTokenChar < (bufferStart + gramSize);
final boolean isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar != bufferStart - 1;
if (termContainsNonTokenChar || isEdgeAndPreviousCharIsTokenChar) {
consume();
gramSize = minGram;
continue;
}
final int length = charUtils.toChars(buffer, bufferStart, gramSize, termAtt.buffer(), 0);
termAtt.setLength(length);
posIncAtt.setPositionIncrement(1);
posLenAtt.setPositionLength(1);
offsetAtt.setOffset(correctOffset(offset), correctOffset(offset + length));
++gramSize;
return true;
}
}
private void updateLastNonTokenChar() {
final int termEnd = bufferStart + gramSize - 1;
if (termEnd > lastCheckedChar) {
for (int i = termEnd; i > lastCheckedChar; --i) {
if (!isTokenChar(buffer[i])) {
lastNonTokenChar = i;
break;
}
}
lastCheckedChar = termEnd;
}
}
/** Consume one code point. */
private void consume() {
offset += Character.charCount(buffer[bufferStart++]);
}
/** Only collect characters which satisfy this condition. */
protected boolean isTokenChar(int chr) {
return true;
}
@Override
public final void end() {
assert bufferStart <= bufferEnd;
int endOffset = offset;
for (int i = bufferStart; i < bufferEnd; ++i) {
endOffset += Character.charCount(buffer[i]);
}
endOffset = correctOffset(endOffset);
offsetAtt.setOffset(endOffset, endOffset);
}
@Override
public final void reset() throws IOException {
super.reset();
bufferStart = bufferEnd = buffer.length;
lastNonTokenChar = lastCheckedChar = bufferStart - 1;
offset = 0;
gramSize = minGram;
exhausted = false;
charBuffer.reset();
}
}

View File

@ -20,7 +20,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.vectorhighlight.XFieldTermStack.TermInfo;
import org.apache.lucene.util.SorterTemplate;
import org.apache.lucene.util.InPlaceMergeSorter;
import java.io.IOException;
import java.util.*;
@ -364,34 +364,24 @@ public class XFieldQuery {
PhraseQuery pq = (PhraseQuery)query;
final Term[] terms = pq.getTerms();
final int[] positions = pq.getPositions();
new SorterTemplate() {
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
Term tmpTerm = terms[i];
terms[i] = terms[j];
terms[j] = tmpTerm;
@Override
protected void swap(int i, int j) {
Term tmpTerm = terms[i];
terms[i] = terms[j];
terms[j] = tmpTerm;
int tmpPos = positions[i];
positions[i] = positions[j];
positions[j] = tmpPos;
}
int tmpPos = positions[i];
positions[i] = positions[j];
positions[j] = tmpPos;
}
@Override
protected int compare(int i, int j) {
return positions[i] - positions[j];
}
@Override
protected void setPivot(int i) {
throw new UnsupportedOperationException();
}
@Override
protected int comparePivot(int j) {
throw new UnsupportedOperationException();
}
}.mergeSort(0, terms.length - 1);
@Override
protected int compare(int i, int j) {
return positions[i] - positions[j];
}
}.sort(0, terms.length);
addToMap(pq, terms, positions, 0, subMap, pq.getSlop());
}

View File

@ -1,171 +0,0 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.apache.lucene.util.packed.XPackedInts.checkBlockSize;
import static org.apache.lucene.util.packed.XPackedInts.numBlocks;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Base implementation for {@link XPagedMutable} and {@link PagedGrowableWriter}.
* @lucene.internal
*/
abstract class XAbstractPagedMutable<T extends XAbstractPagedMutable<T>> {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
static final int MIN_BLOCK_SIZE = 1 << 6;
static final int MAX_BLOCK_SIZE = 1 << 30;
final long size;
final int pageShift;
final int pageMask;
final PackedInts.Mutable[] subMutables;
final int bitsPerValue;
XAbstractPagedMutable(int bitsPerValue, long size, int pageSize) {
this.bitsPerValue = bitsPerValue;
this.size = size;
pageShift = checkBlockSize(pageSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
pageMask = pageSize - 1;
final int numPages = numBlocks(size, pageSize);
subMutables = new PackedInts.Mutable[numPages];
}
protected final void fillPages() {
final int numPages = numBlocks(size, pageSize());
for (int i = 0; i < numPages; ++i) {
// do not allocate for more entries than necessary on the last page
final int valueCount = i == numPages - 1 ? lastPageSize(size) : pageSize();
subMutables[i] = newMutable(valueCount, bitsPerValue);
}
}
protected abstract PackedInts.Mutable newMutable(int valueCount, int bitsPerValue);
final int lastPageSize(long size) {
final int sz = indexInPage(size);
return sz == 0 ? pageSize() : sz;
}
final int pageSize() {
return pageMask + 1;
}
/** The number of values. */
public final long size() {
return size;
}
final int pageIndex(long index) {
return (int) (index >>> pageShift);
}
final int indexInPage(long index) {
return (int) index & pageMask;
}
/** Get value at <code>index</code>. */
public final long get(long index) {
assert index >= 0 && index < size;
final int pageIndex = pageIndex(index);
final int indexInPage = indexInPage(index);
return subMutables[pageIndex].get(indexInPage);
}
/** Set value at <code>index</code>. */
public final void set(long index, long value) {
assert index >= 0 && index < size;
final int pageIndex = pageIndex(index);
final int indexInPage = indexInPage(index);
subMutables[pageIndex].set(indexInPage, value);
}
protected long baseRamBytesUsed() {
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ RamUsageEstimator.NUM_BYTES_LONG
+ 3 * RamUsageEstimator.NUM_BYTES_INT;
}
/** Return the number of bytes used by this object. */
public long ramBytesUsed() {
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed());
bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length);
for (PackedInts.Mutable gw : subMutables) {
bytesUsed += gw.ramBytesUsed();
}
return bytesUsed;
}
protected abstract T newUnfilledCopy(long newSize);
/** Create a new copy of size <code>newSize</code> based on the content of
* this buffer. This method is much more efficient than creating a new
* instance and copying values one by one. */
public final T resize(long newSize) {
final T copy = newUnfilledCopy(newSize);
final int numCommonPages = Math.min(copy.subMutables.length, subMutables.length);
final long[] copyBuffer = new long[1024];
for (int i = 0; i < copy.subMutables.length; ++i) {
final int valueCount = i == copy.subMutables.length - 1 ? lastPageSize(newSize) : pageSize();
final int bpv = i < numCommonPages ? subMutables[i].getBitsPerValue() : this.bitsPerValue;
copy.subMutables[i] = newMutable(valueCount, bpv);
if (i < numCommonPages) {
final int copyLength = Math.min(valueCount, subMutables[i].size());
XPackedInts.copy(subMutables[i], 0, copy.subMutables[i], 0, copyLength, copyBuffer);
}
}
return copy;
}
/** Similar to {@link ArrayUtil#grow(long[], int)}. */
public final T grow(long minSize) {
assert minSize >= 0;
if (minSize <= size()) {
@SuppressWarnings("unchecked")
final T result = (T) this;
return result;
}
long extra = minSize >>> 3;
if (extra < 3) {
extra = 3;
}
final long newSize = minSize + extra;
return resize(newSize);
}
/** Similar to {@link ArrayUtil#grow(long[])}. */
public final T grow() {
return grow(size() + 1);
}
@Override
public final String toString() {
return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")";
}
}

View File

@ -1,162 +0,0 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Implements {@link XPackedInts.Mutable}, but grows the
* bit count of the underlying packed ints on-demand.
* <p>Beware that this class will accept to set negative values but in order
* to do this, it will grow the number of bits per value to 64.
*
* <p>@lucene.internal</p>
*/
public class XGrowableWriter implements PackedInts.Mutable {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
private long currentMask;
private PackedInts.Mutable current;
private final float acceptableOverheadRatio;
/**
* @param startBitsPerValue the initial number of bits per value, may grow depending on the data
* @param valueCount the number of values
* @param acceptableOverheadRatio an acceptable overhead ratio
*/
public XGrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) {
this.acceptableOverheadRatio = acceptableOverheadRatio;
current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
currentMask = mask(current.getBitsPerValue());
}
private static long mask(int bitsPerValue) {
return bitsPerValue == 64 ? ~0L : PackedInts.maxValue(bitsPerValue);
}
@Override
public long get(int index) {
return current.get(index);
}
@Override
public int size() {
return current.size();
}
@Override
public int getBitsPerValue() {
return current.getBitsPerValue();
}
public PackedInts.Mutable getMutable() {
return current;
}
@Override
public Object getArray() {
return current.getArray();
}
@Override
public boolean hasArray() {
return current.hasArray();
}
private void ensureCapacity(long value) {
if ((value & currentMask) == value) {
return;
}
final int bitsRequired = value < 0 ? 64 : PackedInts.bitsRequired(value);
assert bitsRequired > current.getBitsPerValue();
final int valueCount = size();
PackedInts.Mutable next = PackedInts.getMutable(valueCount, bitsRequired, acceptableOverheadRatio);
PackedInts.copy(current, 0, next, 0, valueCount, PackedInts.DEFAULT_BUFFER_SIZE);
current = next;
currentMask = mask(current.getBitsPerValue());
}
@Override
public void set(int index, long value) {
ensureCapacity(value);
current.set(index, value);
}
@Override
public void clear() {
current.clear();
}
public XGrowableWriter resize(int newSize) {
XGrowableWriter next = new XGrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio);
final int limit = Math.min(size(), newSize);
PackedInts.copy(current, 0, next, 0, limit, PackedInts.DEFAULT_BUFFER_SIZE);
return next;
}
@Override
public int get(int index, long[] arr, int off, int len) {
return current.get(index, arr, off, len);
}
@Override
public int set(int index, long[] arr, int off, int len) {
long max = 0;
for (int i = off, end = off + len; i < end; ++i) {
// bitwise or is nice because either all values are positive and the
// or-ed result will require as many bits per value as the max of the
// values, or one of them is negative and the result will be negative,
// forcing GrowableWriter to use 64 bits per value
max |= arr[i];
}
ensureCapacity(max);
return current.set(index, arr, off, len);
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
ensureCapacity(val);
current.fill(fromIndex, toIndex, val);
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ RamUsageEstimator.NUM_BYTES_LONG
+ RamUsageEstimator.NUM_BYTES_FLOAT)
+ current.ramBytesUsed();
}
@Override
public void save(DataOutput out) throws IOException {
current.save(out);
}
}

View File

@ -1,88 +0,0 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Version;
import org.apache.lucene.util.packed.PackedInts.Mutable;
import org.apache.lucene.util.packed.PackedInts.Reader;
import org.elasticsearch.common.lucene.Lucene;
/**
* Simplistic compression for array of unsigned long values.
* Each value is >= 0 and <= a specified maximum value. The
* values are stored as packed ints, with each value
* consuming a fixed number of bits.
*
* @lucene.internal
*/
public class XPackedInts {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
/** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */
static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) {
assert buf.length > 0;
int remaining = 0;
while (len > 0) {
final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
assert read > 0;
srcPos += read;
len -= read;
remaining += read;
final int written = dest.set(destPos, buf, 0, remaining);
assert written > 0;
destPos += written;
if (written < remaining) {
System.arraycopy(buf, written, buf, 0, remaining - written);
}
remaining -= written;
}
while (remaining > 0) {
final int written = dest.set(destPos, buf, 0, remaining);
destPos += written;
remaining -= written;
System.arraycopy(buf, written, buf, 0, remaining);
}
}
/** Check that the block size is a power of 2, in the right bounds, and return
* its log in base 2. */
static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) {
if (blockSize < minBlockSize || blockSize > maxBlockSize) {
throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize);
}
if ((blockSize & (blockSize - 1)) != 0) {
throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
}
return Integer.numberOfTrailingZeros(blockSize);
}
/** Return the number of blocks required to store <code>size</code> values on
* <code>blockSize</code>. */
static int numBlocks(long size, int blockSize) {
final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1);
if ((long) numBlocks * blockSize < size) {
throw new IllegalArgumentException("size is too large for this block size");
}
return numBlocks;
}
}

View File

@ -1,79 +0,0 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts.Mutable;
/**
* A {@link XPagedGrowableWriter}. This class slices data into fixed-size blocks
* which have independent numbers of bits per value and grow on-demand.
* <p>You should use this class instead of {@link AppendingLongBuffer} only when
* you need random write-access. Otherwise this class will likely be slower and
* less memory-efficient.
* @lucene.internal
*/
public final class XPagedGrowableWriter extends XAbstractPagedMutable<XPagedGrowableWriter> {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
final float acceptableOverheadRatio;
/**
* Create a new {@link XPagedGrowableWriter} instance.
*
* @param size the number of values to store.
* @param pageSize the number of values per page
* @param startBitsPerValue the initial number of bits per value
* @param acceptableOverheadRatio an acceptable overhead ratio
*/
public XPagedGrowableWriter(long size, int pageSize,
int startBitsPerValue, float acceptableOverheadRatio) {
this(size, pageSize, startBitsPerValue, acceptableOverheadRatio, true);
}
XPagedGrowableWriter(long size, int pageSize,int startBitsPerValue, float acceptableOverheadRatio, boolean fillPages) {
super(startBitsPerValue, size, pageSize);
this.acceptableOverheadRatio = acceptableOverheadRatio;
if (fillPages) {
fillPages();
}
}
@Override
protected Mutable newMutable(int valueCount, int bitsPerValue) {
return new XGrowableWriter(bitsPerValue, valueCount, acceptableOverheadRatio);
}
@Override
protected XPagedGrowableWriter newUnfilledCopy(long newSize) {
return new XPagedGrowableWriter(newSize, pageSize(), bitsPerValue, acceptableOverheadRatio, false);
}
@Override
protected long baseRamBytesUsed() {
return super.baseRamBytesUsed() + RamUsageEstimator.NUM_BYTES_FLOAT;
}
}

View File

@ -128,7 +128,7 @@ public class Version implements Serializable {
public static final Version V_0_90_3 = new Version(V_0_90_3_ID, false, org.apache.lucene.util.Version.LUCENE_43);
public static final int V_1_0_0_Beta1_ID = /*00*/1000001;
public static final Version V_1_0_0_Beta1 = new Version(V_1_0_0_Beta1_ID, true, org.apache.lucene.util.Version.LUCENE_43);
public static final Version V_1_0_0_Beta1 = new Version(V_1_0_0_Beta1_ID, true, org.apache.lucene.util.Version.LUCENE_44);
public static final Version CURRENT = V_1_0_0_Beta1;

View File

@ -473,7 +473,7 @@ public class MetaDataCreateIndexService extends AbstractComponent {
}
}
CollectionUtil.quickSort(templates, new Comparator<IndexTemplateMetaData>() {
CollectionUtil.timSort(templates, new Comparator<IndexTemplateMetaData>() {
@Override
public int compare(IndexTemplateMetaData o1, IndexTemplateMetaData o2) {
return o2.order() - o1.order();

View File

@ -19,8 +19,11 @@
package org.elasticsearch.cluster.routing.allocation.allocator;
import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.IntroSorter;
import com.google.common.base.Predicate;
import org.apache.lucene.util.SorterTemplate;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.routing.MutableShardRouting;
@ -404,7 +407,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
*/
weights[lowIdx] = sorter.weight(Operation.BALANCE, modelNodes[lowIdx]);
weights[highIdx] = sorter.weight(Operation.BALANCE, modelNodes[highIdx]);
sorter.quickSort(0, weights.length - 1);
sorter.sort(0, weights.length);
lowIdx = 0;
highIdx = weights.length - 1;
changed = true;
@ -451,7 +454,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
sorter.reset(operation, indices[i]);
deltas[i] = sorter.delta();
}
new SorterTemplate() {
new IntroSorter() {
float pivotWeight;
@Override
@ -478,7 +481,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
protected int comparePivot(int j) {
return Float.compare(deltas[j], pivotWeight);
}
}.quickSort(0, deltas.length - 1);
}.sort(0, deltas.length);
return indices;
}
@ -956,7 +959,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
}
}
static final class NodeSorter extends SorterTemplate {
static final class NodeSorter extends IntroSorter {
final ModelNode[] modelNodes;
/* the nodes weights with respect to the current weight function / index */
@ -982,7 +985,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
for (int i = 0; i < weights.length; i++) {
weights[i] = weight(operation, modelNodes[i]);
}
quickSort(0, modelNodes.length - 1);
sort(0, modelNodes.length);
}
public float weight(Operation operation, ModelNode node) {

View File

@ -35,14 +35,13 @@ import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import java.io.IOException;
import java.lang.reflect.Field;
/**
*
*/
public class Lucene {
public static final Version VERSION = Version.LUCENE_43;
public static final Version VERSION = Version.LUCENE_44;
public static final Version ANALYZER_VERSION = VERSION;
public static final Version QUERYPARSER_VERSION = VERSION;
@ -57,6 +56,9 @@ public class Lucene {
if (version == null) {
return defaultVersion;
}
if ("4.4".equals(version)) {
return VERSION.LUCENE_44;
}
if ("4.3".equals(version)) {
return Version.LUCENE_43;
}

View File

@ -19,6 +19,8 @@
package org.elasticsearch.common.lucene.all;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
@ -27,7 +29,6 @@ import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
@ -74,7 +75,7 @@ public class AllTermQuery extends SpanTermQuery {
if (this.stats == null) {
return null;
}
SloppySimScorer sloppySimScorer = similarity.sloppySimScorer(stats, context);
SimScorer sloppySimScorer = similarity.simScorer(stats, context);
return new AllTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, sloppySimScorer);
}
@ -83,7 +84,7 @@ public class AllTermQuery extends SpanTermQuery {
protected float payloadScore;
protected int payloadsSeen;
public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
positions = spans.getPostings();
}
@ -158,7 +159,7 @@ public class AllTermQuery extends SpanTermQuery {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation inner = new ComplexExplanation();
inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));

View File

@ -45,7 +45,7 @@ public class DocIdSets {
* Is it an empty {@link DocIdSet}?
*/
public static boolean isEmpty(@Nullable DocIdSet set) {
return set == null || set == DocIdSet.EMPTY_DOCIDSET;
return set == null || set == EMPTY_DOCIDSET;
}
/**
@ -70,19 +70,19 @@ public class DocIdSets {
* <p/>
* Note, we don't use {@link org.apache.lucene.search.DocIdSet#isCacheable()} because execution
* might be expensive even if its cacheable (i.e. not going back to the reader to execute). We effectively
* always either return {@link DocIdSet#EMPTY_DOCIDSET} or {@link FixedBitSet}.
* always either return an empty {@link DocIdSet} or {@link FixedBitSet} but never <code>null</code>.
*/
public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
if (set == null || set == DocIdSet.EMPTY_DOCIDSET) {
return DocIdSet.EMPTY_DOCIDSET;
if (set == null || set == EMPTY_DOCIDSET) {
return EMPTY_DOCIDSET;
}
DocIdSetIterator it = set.iterator();
if (it == null) {
return DocIdSet.EMPTY_DOCIDSET;
return EMPTY_DOCIDSET;
}
int doc = it.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return DocIdSet.EMPTY_DOCIDSET;
return EMPTY_DOCIDSET;
}
if (set instanceof FixedBitSet) {
return set;
@ -94,6 +94,26 @@ public class DocIdSets {
} while (doc != DocIdSetIterator.NO_MORE_DOCS);
return fixedBitSet;
}
/** An empty {@code DocIdSet} instance */
protected static final DocIdSet EMPTY_DOCIDSET = new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return DocIdSetIterator.empty();
}
@Override
public boolean isCacheable() {
return true;
}
// we explicitly provide no random access, as this filter is 100% sparse and iterator exits faster
@Override
public Bits bits() {
return null;
}
};
/**
* Gets a set to bits.

View File

@ -110,7 +110,7 @@ public class ElectMasterService extends AbstractComponent {
it.remove();
}
}
CollectionUtil.quickSort(possibleNodes, nodeComparator);
CollectionUtil.introSort(possibleNodes, nodeComparator);
return possibleNodes;
}

View File

@ -19,6 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.*;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
@ -47,24 +49,24 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
super(index, indexSettings, name, settings);
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
this.side = EdgeNGramTokenFilter.Side.getSide(settings.get("side", EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
this.side = EdgeNGramTokenFilter.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
}
@Override
public TokenStream create(TokenStream tokenStream) {
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
if (version.onOrAfter(Version.LUCENE_43)) {
TokenStream result = tokenStream;
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
if (side == Side.BACK) {
result = new ReverseStringFilter(version, result);
}
result = new XEdgeNGramTokenFilter(version, result, minGram, maxGram);
result = new EdgeNGramTokenFilter(version, result, minGram, maxGram);
if (side == Side.BACK) {
result = new ReverseStringFilter(version, result);
}
return result;
} else {
return new EdgeNGramTokenFilter(tokenStream, side, minGram, maxGram);
}
return new EdgeNGramTokenFilter(version, tokenStream, side, minGram, maxGram);
}
}

View File

@ -19,12 +19,13 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenizer;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.ngram.XEdgeNGramTokenizer;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -45,7 +46,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
private final int maxGram;
private final EdgeNGramTokenizer.Side side;
private final Lucene43EdgeNGramTokenizer.Side side;
private final CharMatcher matcher;
@ -54,22 +55,23 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
super(index, indexSettings, name, settings);
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
this.side = EdgeNGramTokenizer.Side.getSide(settings.get("side", EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
this.side = Lucene43EdgeNGramTokenizer.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
this.matcher = parseTokenChars(settings.getAsArray("token_chars"));
}
@Override
public Tokenizer create(Reader reader) {
if (version.onOrAfter(Version.LUCENE_43)) {
if (side == EdgeNGramTokenizer.Side.BACK) {
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
if (version.onOrAfter(Version.LUCENE_44)) {
if (side == Lucene43EdgeNGramTokenizer.Side.BACK) {
throw new ElasticSearchIllegalArgumentException("side=BACK is not supported anymore. Please fix your analysis chain or use"
+ " an older compatibility version (<=4.2) but beware that it might cause highlighting bugs.");
}
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
if (matcher == null) {
return new XEdgeNGramTokenizer(version, reader, minGram, maxGram);
return new EdgeNGramTokenizer(version, reader, minGram, maxGram);
} else {
return new XEdgeNGramTokenizer(version, reader, minGram, maxGram) {
return new EdgeNGramTokenizer(version, reader, minGram, maxGram) {
@Override
protected boolean isTokenChar(int chr) {
return matcher.isTokenChar(chr);
@ -77,7 +79,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
};
}
} else {
return new EdgeNGramTokenizer(reader, side, minGram, maxGram);
return new Lucene43EdgeNGramTokenizer(version, reader, side, minGram, maxGram);
}
}
}

View File

@ -17,6 +17,8 @@ package org.elasticsearch.index.analysis;
* specific language governing permissions and limitations
* under the License.
*/
import org.apache.lucene.util.Version;
import java.util.Arrays;
import java.util.Map;
@ -60,8 +62,8 @@ import org.elasticsearch.indices.analysis.IndicesAnalysisService;
*/
@AnalysisSettingsRequired
public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
private Boolean enablePositionIncrements;
private CharArraySet keepWords;
private final CharArraySet keepWords;
private final boolean enablePositionIncrements;
private static final String KEEP_WORDS_KEY = "keep_words";
private static final String KEEP_WORDS_PATH_KEY = KEEP_WORDS_KEY + "_path";
private static final String KEEP_WORDS_CASE_KEY = KEEP_WORDS_KEY + "_case"; // for javadoc
@ -80,14 +82,22 @@ public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
throw new ElasticSearchIllegalArgumentException("keep requires either `" + KEEP_WORDS_KEY + "` or `"
+ KEEP_WORDS_PATH_KEY + "` to be configured");
}
this.enablePositionIncrements = settings.getAsBoolean(ENABLE_POS_INC_KEY, true);
if (version.onOrAfter(Version.LUCENE_44) && settings.get(ENABLE_POS_INC_KEY) != null) {
throw new ElasticSearchIllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain or use"
+ " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
}
enablePositionIncrements = version.onOrAfter(Version.LUCENE_44) ? true : settings.getAsBoolean(ENABLE_POS_INC_KEY, true);
this.keepWords = Analysis.getWordSet(env, settings, KEEP_WORDS_KEY, version);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new KeepWordFilter(enablePositionIncrements, tokenStream, keepWords);
if (version.onOrAfter(Version.LUCENE_44)) {
return new KeepWordFilter(version, tokenStream, keepWords);
}
return new KeepWordFilter(version, enablePositionIncrements, tokenStream, keepWords);
}

View File

@ -19,6 +19,10 @@
package org.elasticsearch.index.analysis;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.elasticsearch.common.inject.Inject;
@ -35,18 +39,25 @@ public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
private final int min;
private final int max;
private final boolean enablePositionIncrements;
private static final String ENABLE_POS_INC_KEY = "enable_position_increments";
@Inject
public LengthTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
min = settings.getAsInt("min", 0);
max = settings.getAsInt("max", Integer.MAX_VALUE);
enablePositionIncrements = settings.getAsBoolean("enabled_position_increments", true);
if (version.onOrAfter(Version.LUCENE_44) && settings.get(ENABLE_POS_INC_KEY) != null) {
throw new ElasticSearchIllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain or use"
+ " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
}
enablePositionIncrements = version.onOrAfter(Version.LUCENE_44) ? true : settings.getAsBoolean(ENABLE_POS_INC_KEY, true);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new LengthFilter(enablePositionIncrements, tokenStream, min, max);
if (version.onOrAfter(Version.LUCENE_44)) {
return new LengthFilter(version, tokenStream, min, max);
}
return new LengthFilter(version, enablePositionIncrements, tokenStream, min, max);
}
}

View File

@ -21,7 +21,6 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.apache.lucene.analysis.ngram.XNGramTokenFilter;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -49,10 +48,7 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenStream create(TokenStream tokenStream) {
if (this.version.onOrAfter(Version.LUCENE_43)) {
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
return new XNGramTokenFilter(version, tokenStream, minGram, maxGram);
}
return new NGramTokenFilter(tokenStream, minGram, maxGram);
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
return new NGramTokenFilter(version, tokenStream, minGram, maxGram);
}
}

View File

@ -19,10 +19,11 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.ngram.Lucene43NGramTokenizer;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.ngram.XNGramTokenizer;
import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
@ -98,20 +99,21 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
@Override
public Tokenizer create(Reader reader) {
if (this.version.onOrAfter(Version.LUCENE_43)) {
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
if (version.onOrAfter(Version.LUCENE_44)) {
if (matcher == null) {
return new XNGramTokenizer(version, reader, minGram, maxGram);
return new NGramTokenizer(version, reader, minGram, maxGram);
} else {
return new XNGramTokenizer(version, reader, minGram, maxGram) {
return new NGramTokenizer(version, reader, minGram, maxGram) {
@Override
protected boolean isTokenChar(int chr) {
return matcher.isTokenChar(chr);
}
};
}
} else {
return new Lucene43NGramTokenizer(reader, minGram, maxGram);
}
return new NGramTokenizer(reader, minGram, maxGram);
}
}

View File

@ -19,6 +19,9 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.elasticsearch.common.inject.Inject;
@ -34,15 +37,23 @@ import org.elasticsearch.index.settings.IndexSettings;
public class TrimTokenFilterFactory extends AbstractTokenFilterFactory {
private final boolean updateOffsets;
private static final String UPDATE_OFFSETS_KEY = "update_offsets";
@Inject
public TrimTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
if (version.onOrAfter(Version.LUCENE_44) && settings.get(UPDATE_OFFSETS_KEY) != null) {
throw new ElasticSearchIllegalArgumentException(UPDATE_OFFSETS_KEY + " is not supported anymore. Please fix your analysis chain or use"
+ " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
}
this.updateOffsets = settings.getAsBoolean("update_offsets", false);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new TrimFilter(tokenStream, updateOffsets);
if (version.onOrAfter(Version.LUCENE_44)) {
return new TrimFilter(version, tokenStream);
}
return new TrimFilter(version, tokenStream, updateOffsets);
}
}

View File

@ -186,7 +186,7 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
// note, we don't wrap the return value with a BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs) because
// we rely on our custom XFilteredQuery to do the wrapping if needed, so we don't have the wrap each
// filter on its own
return cacheValue == DocIdSet.EMPTY_DOCIDSET ? null : cacheValue;
return DocIdSets.isEmpty(cacheValue) ? null : cacheValue;
}
public String toString() {

View File

@ -66,7 +66,10 @@ public class SnapshotDeletionPolicy extends AbstractESDeletionPolicy {
* Called by Lucene. Same as {@link #onCommit(java.util.List)}.
*/
public void onInit(List<? extends IndexCommit> commits) throws IOException {
onCommit(commits);
if (!commits.isEmpty()) { // this might be empty if we create a new index.
// the behavior has changed in Lucene 4.4 that calls onInit even with an empty commits list.
onCommit(commits);
}
}
/**
@ -74,6 +77,7 @@ public class SnapshotDeletionPolicy extends AbstractESDeletionPolicy {
* and delegates to the wrapped deletion policy.
*/
public void onCommit(List<? extends IndexCommit> commits) throws IOException {
assert !commits.isEmpty() : "Commits must not be empty";
synchronized (mutex) {
List<SnapshotIndexCommit> snapshotCommits = wrapCommits(commits);
primary.onCommit(snapshotCommits);
@ -94,7 +98,8 @@ public class SnapshotDeletionPolicy extends AbstractESDeletionPolicy {
}
this.commits = newCommits;
// the last commit that is not deleted
this.lastCommit = newCommits.get(newCommits.size() - 1);
this.lastCommit = newCommits.get(newCommits.size() - 1);
}
}

View File

@ -1273,7 +1273,6 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine {
config.setReaderTermsIndexDivisor(termIndexDivisor);
config.setMaxThreadStates(indexConcurrency);
config.setCodec(codecService.codec(codecName));
indexWriter = new IndexWriter(store.directory(), config);
} catch (IOException e) {
safeClose(indexWriter);

View File

@ -19,10 +19,6 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.AppendingLongBuffer;
@ -34,11 +30,8 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter;
/** {@link Ordinals} implementation which is efficient at storing field data ordinals for multi-valued or sparse fields. */
public class MultiOrdinals implements Ordinals {
// hard-coded in Lucene 4.3 but will be exposed in Lucene 4.4
static {
assert Lucene.VERSION == Version.LUCENE_43;
}
private static final int OFFSETS_PAGE_SIZE = 1024;
private static final int OFFSET_INIT_PAGE_COUNT = 16;
/** Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. */
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds) {
@ -62,7 +55,7 @@ public class MultiOrdinals implements Ordinals {
multiValued = builder.getNumMultiValuesDocs() > 0;
numOrds = builder.getNumOrds();
endOffsets = new MonotonicAppendingLongBuffer();
ords = new AppendingLongBuffer();
ords = new AppendingLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE);
long lastEndOffset = 0;
for (int i = 0; i < builder.maxDoc(); ++i) {
final LongsRef docOrds = builder.docOrds(i);

View File

@ -25,7 +25,7 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.*;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.XPagedGrowableWriter;
import org.apache.lucene.util.packed.PagedGrowableWriter;
import org.elasticsearch.common.settings.Settings;
import java.io.Closeable;
@ -117,13 +117,13 @@ public final class OrdinalsBuilder implements Closeable {
}
// Current position
private XPagedGrowableWriter positions;
private PagedGrowableWriter positions;
// First level (0) of ordinals and pointers to the next level
private final GrowableWriter firstOrdinals;
private XPagedGrowableWriter firstNextLevelSlices;
private PagedGrowableWriter firstNextLevelSlices;
// Ordinals and pointers for other levels, starting at 1
private final XPagedGrowableWriter[] ordinals;
private final XPagedGrowableWriter[] nextLevelSlices;
private final PagedGrowableWriter[] ordinals;
private final PagedGrowableWriter[] nextLevelSlices;
private final int[] sizes;
private final int startBitsPerValue;
@ -132,11 +132,11 @@ public final class OrdinalsBuilder implements Closeable {
OrdinalsStore(int maxDoc, int startBitsPerValue, float acceptableOverheadRatio) {
this.startBitsPerValue = startBitsPerValue;
this.acceptableOverheadRatio = acceptableOverheadRatio;
positions = new XPagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
positions = new PagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
firstOrdinals = new GrowableWriter(startBitsPerValue, maxDoc, acceptableOverheadRatio);
// over allocate in order to never worry about the array sizes, 24 entries would allow to store several millions of ordinals per doc...
ordinals = new XPagedGrowableWriter[24];
nextLevelSlices = new XPagedGrowableWriter[24];
ordinals = new PagedGrowableWriter[24];
nextLevelSlices = new PagedGrowableWriter[24];
sizes = new int[24];
Arrays.fill(sizes, 1); // reserve the 1st slice on every level
}
@ -146,7 +146,7 @@ public final class OrdinalsBuilder implements Closeable {
final long newSlice = sizes[level]++;
// Lazily allocate ordinals
if (ordinals[level] == null) {
ordinals[level] = new XPagedGrowableWriter(8L * numSlots(level), PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
ordinals[level] = new PagedGrowableWriter(8L * numSlots(level), PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
} else {
ordinals[level] = ordinals[level].grow(sizes[level] * numSlots(level));
if (nextLevelSlices[level] != null) {
@ -167,7 +167,7 @@ public final class OrdinalsBuilder implements Closeable {
} else {
final long newSlice = newSlice(1);
if (firstNextLevelSlices == null) {
firstNextLevelSlices = new XPagedGrowableWriter(firstOrdinals.size(), PAGE_SIZE, 3, acceptableOverheadRatio);
firstNextLevelSlices = new PagedGrowableWriter(firstOrdinals.size(), PAGE_SIZE, 3, acceptableOverheadRatio);
}
firstNextLevelSlices.set(docID, newSlice);
final long offset = startOffset(1, newSlice);
@ -183,7 +183,7 @@ public final class OrdinalsBuilder implements Closeable {
// reached the end of the slice, allocate a new one on the next level
final long newSlice = newSlice(level + 1);
if (nextLevelSlices[level] == null) {
nextLevelSlices[level] = new XPagedGrowableWriter(sizes[level], PAGE_SIZE, 1, acceptableOverheadRatio);
nextLevelSlices[level] = new PagedGrowableWriter(sizes[level], PAGE_SIZE, 1, acceptableOverheadRatio);
}
nextLevelSlices[level].set(sliceID(level, offset), newSlice);
++level;

View File

@ -60,7 +60,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
if (terms == null) {
return FSTBytesAtomicFieldData.empty(reader.maxDoc());
}
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
final IntsRef scratch = new IntsRef();

View File

@ -40,7 +40,7 @@ public class CommitPoints implements Iterable<CommitPoint> {
private final ImmutableList<CommitPoint> commitPoints;
public CommitPoints(List<CommitPoint> commitPoints) {
CollectionUtil.quickSort(commitPoints, new Comparator<CommitPoint>() {
CollectionUtil.introSort(commitPoints, new Comparator<CommitPoint>() {
@Override
public int compare(CommitPoint o1, CommitPoint o2) {
return (o2.version() < o1.version() ? -1 : (o2.version() == o1.version() ? 0 : 1));

View File

@ -27,13 +27,11 @@ public abstract class AbstractMergePolicyProvider<MP extends MergePolicy> extend
public static final String INDEX_COMPOUND_FORMAT = "index.compound_format";
protected volatile boolean compoundFormat;
protected volatile double noCFSRatio;
protected AbstractMergePolicyProvider(Store store) {
super(store.shardId(), store.indexSettings());
this.noCFSRatio = parseNoCFSRatio(indexSettings.get(INDEX_COMPOUND_FORMAT, Boolean.toString(store.suggestUseCompoundFile())));
this.compoundFormat = noCFSRatio != 0.0;
}
public static double parseNoCFSRatio(String noCFSRatio) {

View File

@ -83,7 +83,6 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
mergePolicy.setMergeFactor(mergeFactor);
mergePolicy.setMaxMergeDocs(maxMergeDocs);
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
mergePolicy.setUseCompoundFile(compoundFormat);
mergePolicy.setNoCFSRatio(noCFSRatio);
policies.add(mergePolicy);
@ -140,14 +139,11 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
}
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(LogByteSizeMergePolicyProvider.this.noCFSRatio)));
final boolean compoundFormat = noCFSRatio != 0.0;
if (noCFSRatio != LogByteSizeMergePolicyProvider.this.noCFSRatio) {
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogByteSizeMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
LogByteSizeMergePolicyProvider.this.compoundFormat = compoundFormat;
LogByteSizeMergePolicyProvider.this.noCFSRatio = noCFSRatio;
for (CustomLogByteSizeMergePolicy policy : policies) {
policy.setNoCFSRatio(noCFSRatio);
policy.setUseCompoundFile(compoundFormat);
}
}

View File

@ -86,7 +86,6 @@ public class LogDocMergePolicyProvider extends AbstractMergePolicyProvider<LogDo
mergePolicy.setMaxMergeDocs(maxMergeDocs);
mergePolicy.setMergeFactor(mergeFactor);
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
mergePolicy.setUseCompoundFile(compoundFormat);
mergePolicy.setNoCFSRatio(noCFSRatio);
policies.add(mergePolicy);
return mergePolicy;
@ -130,11 +129,9 @@ public class LogDocMergePolicyProvider extends AbstractMergePolicyProvider<LogDo
final boolean compoundFormat = noCFSRatio != 0.0;
if (noCFSRatio != LogDocMergePolicyProvider.this.noCFSRatio) {
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogDocMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
LogDocMergePolicyProvider.this.compoundFormat = compoundFormat;
LogDocMergePolicyProvider.this.noCFSRatio = noCFSRatio;
for (CustomLogDocMergePolicy policy : policies) {
policy.setNoCFSRatio(noCFSRatio);
policy.setUseCompoundFile(compoundFormat);
}
}
}

View File

@ -99,7 +99,6 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
} else {
mergePolicy = new CustomTieredMergePolicyProvider(this);
}
mergePolicy.setUseCompoundFile(compoundFormat);
mergePolicy.setNoCFSRatio(noCFSRatio);
mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed);
mergePolicy.setFloorSegmentMB(floorSegment.mbFrac());
@ -191,14 +190,11 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
}
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(TieredMergePolicyProvider.this.noCFSRatio)));
final boolean compoundFormat = noCFSRatio != 0.0;
if (noCFSRatio != TieredMergePolicyProvider.this.noCFSRatio) {
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(TieredMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
TieredMergePolicyProvider.this.compoundFormat = compoundFormat;
TieredMergePolicyProvider.this.noCFSRatio = noCFSRatio;
for (CustomTieredMergePolicyProvider policy : policies) {
policy.setNoCFSRatio(noCFSRatio);
policy.setUseCompoundFile(compoundFormat);
}
}

View File

@ -59,8 +59,7 @@ public class ConcurrentMergeSchedulerProvider extends MergeSchedulerProvider {
@Override
public MergeScheduler newMergeScheduler() {
CustomConcurrentMergeScheduler concurrentMergeScheduler = new CustomConcurrentMergeScheduler(logger, shardId, this);
concurrentMergeScheduler.setMaxMergeCount(maxMergeCount);
concurrentMergeScheduler.setMaxThreadCount(maxThreadCount);
concurrentMergeScheduler.setMaxMergesAndThreads(maxMergeCount, maxThreadCount);
schedulers.add(concurrentMergeScheduler);
return concurrentMergeScheduler;
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.bytes.HashedBytesArray;
import org.elasticsearch.common.lucene.docset.DocIdSets;
import org.elasticsearch.common.lucene.search.ApplyAcceptedDocsFilter;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.lucene.search.TermFilter;
@ -217,7 +218,7 @@ public class ChildrenQuery extends Query implements SearchContext.Rewrite {
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
DocIdSet parentsSet = parentFilter.getDocIdSet(context, acceptDocs);
if (parentsSet == null || parentsSet == DocIdSet.EMPTY_DOCIDSET || remaining == 0) {
if (DocIdSets.isEmpty(parentsSet) || remaining == 0) {
return null;
}

View File

@ -19,6 +19,8 @@
package org.elasticsearch.index.search.child;
import org.elasticsearch.common.lucene.docset.DocIdSets;
import gnu.trove.map.hash.TObjectFloatHashMap;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
@ -212,7 +214,7 @@ public class ParentQuery extends Query implements SearchContext.Rewrite {
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, acceptDocs);
if (childrenDocSet == null || childrenDocSet == DocIdSet.EMPTY_DOCIDSET) {
if (DocIdSets.isEmpty(childrenDocSet)) {
return null;
}
IdReaderTypeCache idTypeCache = searchContext.idCache().reader(context.reader()).type(parentType);

View File

@ -82,7 +82,6 @@ import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.Lucene;
@ -91,7 +90,6 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.analysis.*;
import java.io.IOException;
import java.io.Reader;
import java.util.Map;
@ -268,7 +266,7 @@ public class IndicesAnalysisService extends AbstractComponent {
@Override
public Tokenizer create(Reader reader) {
return new NGramTokenizer(reader);
return new NGramTokenizer(Lucene.ANALYZER_VERSION, reader);
}
}));
@ -280,7 +278,7 @@ public class IndicesAnalysisService extends AbstractComponent {
@Override
public Tokenizer create(Reader reader) {
return new NGramTokenizer(reader);
return new NGramTokenizer(Lucene.ANALYZER_VERSION, reader);
}
}));
@ -292,7 +290,7 @@ public class IndicesAnalysisService extends AbstractComponent {
@Override
public Tokenizer create(Reader reader) {
return new EdgeNGramTokenizer(reader, EdgeNGramTokenizer.DEFAULT_SIDE, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
return new EdgeNGramTokenizer(Lucene.ANALYZER_VERSION, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
}
}));
@ -304,7 +302,7 @@ public class IndicesAnalysisService extends AbstractComponent {
@Override
public Tokenizer create(Reader reader) {
return new EdgeNGramTokenizer(reader, EdgeNGramTokenizer.DEFAULT_SIDE, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
return new EdgeNGramTokenizer(Lucene.ANALYZER_VERSION, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
}
}));
@ -357,7 +355,7 @@ public class IndicesAnalysisService extends AbstractComponent {
@Override
public TokenStream create(TokenStream tokenStream) {
return new TrimFilter(tokenStream, false);
return new TrimFilter(Lucene.ANALYZER_VERSION, tokenStream);
}
}));
@ -393,7 +391,7 @@ public class IndicesAnalysisService extends AbstractComponent {
@Override
public TokenStream create(TokenStream tokenStream) {
return new LengthFilter(true, tokenStream, 0, Integer.MAX_VALUE);
return new LengthFilter(Lucene.ANALYZER_VERSION, tokenStream, 0, Integer.MAX_VALUE);
}
}));
@ -477,7 +475,7 @@ public class IndicesAnalysisService extends AbstractComponent {
@Override
public TokenStream create(TokenStream tokenStream) {
return new NGramTokenFilter(tokenStream);
return new NGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream);
}
}));
@ -489,7 +487,7 @@ public class IndicesAnalysisService extends AbstractComponent {
@Override
public TokenStream create(TokenStream tokenStream) {
return new NGramTokenFilter(tokenStream);
return new NGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream);
}
}));
@ -501,7 +499,7 @@ public class IndicesAnalysisService extends AbstractComponent {
@Override
public TokenStream create(TokenStream tokenStream) {
return new EdgeNGramTokenFilter(tokenStream, EdgeNGramTokenFilter.DEFAULT_SIDE, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
return new EdgeNGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
}
}));
@ -513,7 +511,7 @@ public class IndicesAnalysisService extends AbstractComponent {
@Override
public TokenStream create(TokenStream tokenStream) {
return new EdgeNGramTokenFilter(tokenStream, EdgeNGramTokenFilter.DEFAULT_SIDE, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
return new EdgeNGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
}
}));

View File

@ -131,7 +131,7 @@ public class HotThreads {
// sort by delta CPU time on thread.
List<MyThreadInfo> hotties = new ArrayList<MyThreadInfo>(threadInfos.values());
// skip that for now
CollectionUtil.quickSort(hotties, new Comparator<MyThreadInfo>() {
CollectionUtil.introSort(hotties, new Comparator<MyThreadInfo>() {
public int compare(MyThreadInfo o1, MyThreadInfo o2) {
if ("cpu".equals(type)) {
return (int) (o2.cpuTime - o1.cpuTime);

View File

@ -142,7 +142,7 @@ public class PlainHighlighter implements Highlighter {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
if (field.scoreOrdered()) {
CollectionUtil.quickSort(fragsList, new Comparator<TextFragment>() {
CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
public int compare(TextFragment o1, TextFragment o2) {
return Math.round(o2.getScore() - o1.getScore());
}

View File

@ -57,7 +57,7 @@ public final class FragmentBuilderHelper {
* the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather
* than in this hack... aka. "we are are working on in!" */
final List<SubInfo> subInfos = fragInfo.getSubInfos();
CollectionUtil.quickSort(subInfos, new Comparator<SubInfo>() {
CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() {
@Override
public int compare(SubInfo o1, SubInfo o2) {
int startOffset = o1.getTermsOffsets().get(0).getStartOffset();

View File

@ -17,22 +17,11 @@ package org.elasticsearch.search.rescore;
* specific language governing permissions and limitations
* under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.SorterTemplate;
import org.apache.lucene.util.IntroSorter;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
@ -40,6 +29,10 @@ import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
final class QueryRescorer implements Rescorer {
public static final Rescorer INSTANCE = new QueryRescorer();
@ -163,10 +156,10 @@ final class QueryRescorer implements Rescorer {
private TopDocs merge(TopDocs primary, TopDocs secondary, QueryRescoreContext context) {
DocIdSorter sorter = new DocIdSorter();
sorter.array = primary.scoreDocs;
sorter.mergeSort(0, sorter.array.length-1);
sorter.sort(0, sorter.array.length);
ScoreDoc[] primaryDocs = sorter.array;
sorter.array = secondary.scoreDocs;
sorter.mergeSort(0, sorter.array.length-1);
sorter.sort(0, sorter.array.length);
ScoreDoc[] secondaryDocs = sorter.array;
int j = 0;
float primaryWeight = context.queryWeight();
@ -180,12 +173,12 @@ final class QueryRescorer implements Rescorer {
}
ScoreSorter scoreSorter = new ScoreSorter();
scoreSorter.array = primaryDocs;
scoreSorter.mergeSort(0, primaryDocs.length-1);
scoreSorter.sort(0, primaryDocs.length);
primary.setMaxScore(primaryDocs[0].score);
return primary;
}
private static final class DocIdSorter extends SorterTemplate {
private static final class DocIdSorter extends IntroSorter {
private ScoreDoc[] array;
private ScoreDoc pivot;
@Override
@ -222,7 +215,7 @@ final class QueryRescorer implements Rescorer {
return -1;
}
private static final class ScoreSorter extends SorterTemplate {
private static final class ScoreSorter extends IntroSorter {
private ScoreDoc[] array;
private ScoreDoc pivot;
@Override

View File

@ -5,15 +5,15 @@
"my_keep_filter":{
"type":"keep",
"keep_words" : ["Hello", "worlD"],
"enable_position_increments" : true,
"keep_words_case" : true
},
"my_case_sensitive_keep_filter":{
"type":"keep",
"keep_words" : ["Hello", "worlD"],
"enable_position_increments" : false
"enable_position_increments" : false,
"version" : "4.2"
}
}
}
}
}
}

View File

@ -50,73 +50,40 @@ public class MergePolicySettingsTest {
public void testCompoundFileSettings() throws IOException {
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
assertThat(new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new TieredMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new TieredMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new TieredMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new TieredMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
assertThat(new TieredMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new TieredMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new TieredMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new TieredMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new TieredMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new TieredMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new TieredMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new TieredMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new TieredMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new TieredMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new TieredMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new TieredMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new TieredMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new TieredMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new TieredMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new TieredMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogDocMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new LogDocMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new LogDocMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new LogDocMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
assertThat(new LogDocMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new LogDocMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new LogDocMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new LogDocMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new LogDocMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(new LogDocMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
assertThat(new LogDocMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogDocMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogDocMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogDocMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogDocMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogDocMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogDocMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogDocMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
assertThat(new LogDocMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(new LogDocMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
}
@ -150,57 +117,45 @@ public class MergePolicySettingsTest {
{
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
TieredMergePolicyProvider mp = new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
service.refreshSettings(build(1.0));
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
service.refreshSettings(build(0.1));
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
service.refreshSettings(build(0.0));
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
}
{
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
LogByteSizeMergePolicyProvider mp = new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
service.refreshSettings(build(1.0));
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
service.refreshSettings(build(0.1));
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
service.refreshSettings(build(0.0));
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
}
{
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
LogDocMergePolicyProvider mp = new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
service.refreshSettings(build(1.0));
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
service.refreshSettings(build(0.1));
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
service.refreshSettings(build(0.0));
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
}
}