Upgrade to Lucene 4.4
This commit is contained in:
parent
92a7030558
commit
2e9851138e
2
pom.xml
2
pom.xml
|
@ -30,7 +30,7 @@
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<lucene.version>4.3.1</lucene.version>
|
<lucene.version>4.4.0</lucene.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<repositories>
|
<repositories>
|
||||||
|
|
|
@ -1,214 +0,0 @@
|
||||||
package org.apache.lucene.analysis.ngram;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
|
||||||
import org.apache.lucene.analysis.util.XCharacterUtils;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tokenizes the given token into n-grams of given size(s).
|
|
||||||
* <p>
|
|
||||||
* This {@link TokenFilter} create n-grams from the beginning edge or ending edge of a input token.
|
|
||||||
* <p><a name="version"/>As of Lucene 4.4, this filter does not support
|
|
||||||
* {@link Side#BACK} (you can use {@link ReverseStringFilter} up-front and
|
|
||||||
* afterward to get the same behavior), handles supplementary characters
|
|
||||||
* correctly and does not update offsets anymore.
|
|
||||||
*/
|
|
||||||
public final class XEdgeNGramTokenFilter extends TokenFilter {
|
|
||||||
|
|
||||||
static {
|
|
||||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final Side DEFAULT_SIDE = Side.FRONT;
|
|
||||||
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
|
|
||||||
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
|
|
||||||
|
|
||||||
/** Specifies which side of the input the n-gram should be generated from */
|
|
||||||
public static enum Side {
|
|
||||||
|
|
||||||
/** Get the n-gram from the front of the input */
|
|
||||||
FRONT {
|
|
||||||
@Override
|
|
||||||
public String getLabel() { return "front"; }
|
|
||||||
},
|
|
||||||
|
|
||||||
/** Get the n-gram from the end of the input */
|
|
||||||
@Deprecated
|
|
||||||
BACK {
|
|
||||||
@Override
|
|
||||||
public String getLabel() { return "back"; }
|
|
||||||
};
|
|
||||||
|
|
||||||
public abstract String getLabel();
|
|
||||||
|
|
||||||
// Get the appropriate Side from a string
|
|
||||||
public static Side getSide(String sideName) {
|
|
||||||
if (FRONT.getLabel().equals(sideName)) {
|
|
||||||
return FRONT;
|
|
||||||
}
|
|
||||||
if (BACK.getLabel().equals(sideName)) {
|
|
||||||
return BACK;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private final XCharacterUtils charUtils;
|
|
||||||
private final int minGram;
|
|
||||||
private final int maxGram;
|
|
||||||
private Side side;
|
|
||||||
private char[] curTermBuffer;
|
|
||||||
private int curTermLength;
|
|
||||||
private int curCodePointCount;
|
|
||||||
private int curGramSize;
|
|
||||||
private int tokStart;
|
|
||||||
private int tokEnd;
|
|
||||||
private int savePosIncr;
|
|
||||||
private int savePosLen;
|
|
||||||
|
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
|
||||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
|
||||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
|
||||||
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates XEdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param input {@link TokenStream} holding the input to be tokenized
|
|
||||||
* @param side the {@link Side} from which to chop off an n-gram
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public XEdgeNGramTokenFilter(Version version, TokenStream input, Side side, int minGram, int maxGram) {
|
|
||||||
super(input);
|
|
||||||
|
|
||||||
if (version == null) {
|
|
||||||
throw new IllegalArgumentException("version must not be null");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (side == Side.BACK) {
|
|
||||||
throw new IllegalArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (side == null) {
|
|
||||||
throw new IllegalArgumentException("sideLabel must be either front or back");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (minGram < 1) {
|
|
||||||
throw new IllegalArgumentException("minGram must be greater than zero");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (minGram > maxGram) {
|
|
||||||
throw new IllegalArgumentException("minGram must not be greater than maxGram");
|
|
||||||
}
|
|
||||||
|
|
||||||
this.charUtils = XCharacterUtils.getInstance(version);
|
|
||||||
this.minGram = minGram;
|
|
||||||
this.maxGram = maxGram;
|
|
||||||
this.side = side;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates XEdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param input {@link TokenStream} holding the input to be tokenized
|
|
||||||
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public XEdgeNGramTokenFilter(Version version, TokenStream input, String sideLabel, int minGram, int maxGram) {
|
|
||||||
this(version, input, Side.getSide(sideLabel), minGram, maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates XEdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param input {@link TokenStream} holding the input to be tokenized
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
public XEdgeNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
|
|
||||||
this(version, input, Side.FRONT, minGram, maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final boolean incrementToken() throws IOException {
|
|
||||||
while (true) {
|
|
||||||
if (curTermBuffer == null) {
|
|
||||||
if (!input.incrementToken()) {
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
curTermBuffer = termAtt.buffer().clone();
|
|
||||||
curTermLength = termAtt.length();
|
|
||||||
curCodePointCount = charUtils.codePointCount(termAtt);
|
|
||||||
curGramSize = minGram;
|
|
||||||
tokStart = offsetAtt.startOffset();
|
|
||||||
tokEnd = offsetAtt.endOffset();
|
|
||||||
savePosIncr += posIncrAtt.getPositionIncrement();
|
|
||||||
savePosLen = posLenAtt.getPositionLength();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit
|
|
||||||
if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
|
|
||||||
// grab gramSize chars from front or back
|
|
||||||
final int start = side == Side.FRONT ? 0 : charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, curTermLength, -curGramSize);
|
|
||||||
final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
|
|
||||||
clearAttributes();
|
|
||||||
offsetAtt.setOffset(tokStart, tokEnd);
|
|
||||||
// first ngram gets increment, others don't
|
|
||||||
if (curGramSize == minGram) {
|
|
||||||
posIncrAtt.setPositionIncrement(savePosIncr);
|
|
||||||
savePosIncr = 0;
|
|
||||||
} else {
|
|
||||||
posIncrAtt.setPositionIncrement(0);
|
|
||||||
}
|
|
||||||
posLenAtt.setPositionLength(savePosLen);
|
|
||||||
termAtt.copyBuffer(curTermBuffer, start, end - start);
|
|
||||||
curGramSize++;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
curTermBuffer = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void reset() throws IOException {
|
|
||||||
super.reset();
|
|
||||||
curTermBuffer = null;
|
|
||||||
savePosIncr = 0;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,77 +0,0 @@
|
||||||
package org.apache.lucene.analysis.ngram;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
import java.io.Reader;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tokenizes the input from an edge into n-grams of given size(s).
|
|
||||||
* <p>
|
|
||||||
* This {@link Tokenizer} create n-grams from the beginning edge or ending edge of a input token.
|
|
||||||
* <p><a name="version" /> As of Lucene 4.4, this tokenizer<ul>
|
|
||||||
* <li>can handle <code>maxGram</code> larger than 1024 chars, but beware that this will result in increased memory usage
|
|
||||||
* <li>doesn't trim the input,
|
|
||||||
* <li>sets position increments equal to 1 instead of 1 for the first token and 0 for all other ones
|
|
||||||
* <li>doesn't support backward n-grams anymore.
|
|
||||||
* <li>supports {@link #isTokenChar(int) pre-tokenization},
|
|
||||||
* <li>correctly handles supplementary characters.
|
|
||||||
* </ul>
|
|
||||||
* <p>Although <b style="color:red">highly</b> discouraged, it is still possible
|
|
||||||
* to use the old behavior through {@link Lucene43XEdgeXNGramTokenizer}.
|
|
||||||
*/
|
|
||||||
public class XEdgeNGramTokenizer extends XNGramTokenizer {
|
|
||||||
|
|
||||||
static {
|
|
||||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
|
|
||||||
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates XEdgeXNGramTokenizer that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
public XEdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
|
|
||||||
super(version, input, minGram, maxGram, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates XEdgeXNGramTokenizer that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
public XEdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
|
|
||||||
super(version, factory, input, minGram, maxGram, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,281 +0,0 @@
|
||||||
package org.apache.lucene.analysis.ngram;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Old version of {@link EdgeNGramTokenizer} which doesn't handle correctly
|
|
||||||
* supplementary characters.
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public final class XLucene43EdgeNGramTokenizer extends Tokenizer {
|
|
||||||
|
|
||||||
static {
|
|
||||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final Side DEFAULT_SIDE = Side.FRONT;
|
|
||||||
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
|
|
||||||
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
|
|
||||||
|
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
|
||||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
|
||||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
|
||||||
|
|
||||||
/** Specifies which side of the input the n-gram should be generated from */
|
|
||||||
public static enum Side {
|
|
||||||
|
|
||||||
/** Get the n-gram from the front of the input */
|
|
||||||
FRONT {
|
|
||||||
@Override
|
|
||||||
public String getLabel() { return "front"; }
|
|
||||||
},
|
|
||||||
|
|
||||||
/** Get the n-gram from the end of the input */
|
|
||||||
BACK {
|
|
||||||
@Override
|
|
||||||
public String getLabel() { return "back"; }
|
|
||||||
};
|
|
||||||
|
|
||||||
public abstract String getLabel();
|
|
||||||
|
|
||||||
// Get the appropriate Side from a string
|
|
||||||
public static Side getSide(String sideName) {
|
|
||||||
if (FRONT.getLabel().equals(sideName)) {
|
|
||||||
return FRONT;
|
|
||||||
}
|
|
||||||
if (BACK.getLabel().equals(sideName)) {
|
|
||||||
return BACK;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private int minGram;
|
|
||||||
private int maxGram;
|
|
||||||
private int gramSize;
|
|
||||||
private Side side;
|
|
||||||
private boolean started;
|
|
||||||
private int inLen; // length of the input AFTER trim()
|
|
||||||
private int charsRead; // length of the input
|
|
||||||
private String inStr;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param side the {@link Side} from which to chop off an n-gram
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public XLucene43EdgeNGramTokenizer(Version version, Reader input, Side side, int minGram, int maxGram) {
|
|
||||||
super(input);
|
|
||||||
init(version, side, minGram, maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param side the {@link Side} from which to chop off an n-gram
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public XLucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, Side side, int minGram, int maxGram) {
|
|
||||||
super(factory, input);
|
|
||||||
init(version, side, minGram, maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public XLucene43EdgeNGramTokenizer(Version version, Reader input, String sideLabel, int minGram, int maxGram) {
|
|
||||||
this(version, input, Side.getSide(sideLabel), minGram, maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public XLucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, String sideLabel, int minGram, int maxGram) {
|
|
||||||
this(version, factory, input, Side.getSide(sideLabel), minGram, maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
public XLucene43EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
|
|
||||||
this(version, input, Side.FRONT, minGram, maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
|
||||||
*
|
|
||||||
* @param version the <a href="#version">Lucene match version</a>
|
|
||||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
public XLucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
|
|
||||||
this(version, factory, input, Side.FRONT, minGram, maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void init(Version version, Side side, int minGram, int maxGram) {
|
|
||||||
if (version == null) {
|
|
||||||
throw new IllegalArgumentException("version must not be null");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (side == null) {
|
|
||||||
throw new IllegalArgumentException("sideLabel must be either front or back");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (minGram < 1) {
|
|
||||||
throw new IllegalArgumentException("minGram must be greater than zero");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (minGram > maxGram) {
|
|
||||||
throw new IllegalArgumentException("minGram must not be greater than maxGram");
|
|
||||||
}
|
|
||||||
|
|
||||||
maxGram = Math.min(maxGram, 1024);
|
|
||||||
|
|
||||||
this.minGram = minGram;
|
|
||||||
this.maxGram = maxGram;
|
|
||||||
this.side = side;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the next token in the stream, or null at EOS. */
|
|
||||||
@Override
|
|
||||||
public boolean incrementToken() throws IOException {
|
|
||||||
clearAttributes();
|
|
||||||
// if we are just starting, read the whole input
|
|
||||||
if (!started) {
|
|
||||||
started = true;
|
|
||||||
gramSize = minGram;
|
|
||||||
final int limit = side == Side.FRONT ? maxGram : 1024;
|
|
||||||
char[] chars = new char[Math.min(1024, limit)];
|
|
||||||
charsRead = 0;
|
|
||||||
// TODO: refactor to a shared readFully somewhere:
|
|
||||||
boolean exhausted = false;
|
|
||||||
while (charsRead < limit) {
|
|
||||||
final int inc = input.read(chars, charsRead, chars.length-charsRead);
|
|
||||||
if (inc == -1) {
|
|
||||||
exhausted = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
charsRead += inc;
|
|
||||||
if (charsRead == chars.length && charsRead < limit) {
|
|
||||||
chars = ArrayUtil.grow(chars);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inStr = new String(chars, 0, charsRead);
|
|
||||||
inStr = inStr.trim();
|
|
||||||
|
|
||||||
if (!exhausted) {
|
|
||||||
// Read extra throwaway chars so that on end() we
|
|
||||||
// report the correct offset:
|
|
||||||
char[] throwaway = new char[1024];
|
|
||||||
while(true) {
|
|
||||||
final int inc = input.read(throwaway, 0, throwaway.length);
|
|
||||||
if (inc == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
charsRead += inc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inLen = inStr.length();
|
|
||||||
if (inLen == 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
posIncrAtt.setPositionIncrement(1);
|
|
||||||
} else {
|
|
||||||
posIncrAtt.setPositionIncrement(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// if the remaining input is too short, we can't generate any n-grams
|
|
||||||
if (gramSize > inLen) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if we have hit the end of our n-gram size range, quit
|
|
||||||
if (gramSize > maxGram || gramSize > inLen) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// grab gramSize chars from front or back
|
|
||||||
int start = side == Side.FRONT ? 0 : inLen - gramSize;
|
|
||||||
int end = start + gramSize;
|
|
||||||
termAtt.setEmpty().append(inStr, start, end);
|
|
||||||
offsetAtt.setOffset(correctOffset(start), correctOffset(end));
|
|
||||||
gramSize++;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void end() {
|
|
||||||
// set final offset
|
|
||||||
final int finalOffset = correctOffset(charsRead);
|
|
||||||
this.offsetAtt.setOffset(finalOffset, finalOffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void reset() throws IOException {
|
|
||||||
super.reset();
|
|
||||||
started = false;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,164 +0,0 @@
|
||||||
package org.apache.lucene.analysis.ngram;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Old broken version of {@link NGramTokenizer}.
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public final class XLucene43NGramTokenizer extends Tokenizer {
|
|
||||||
|
|
||||||
static {
|
|
||||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
|
|
||||||
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
|
|
||||||
|
|
||||||
private int minGram, maxGram;
|
|
||||||
private int gramSize;
|
|
||||||
private int pos;
|
|
||||||
private int inLen; // length of the input AFTER trim()
|
|
||||||
private int charsRead; // length of the input
|
|
||||||
private String inStr;
|
|
||||||
private boolean started;
|
|
||||||
|
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
|
||||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates NGramTokenizer with given min and max n-grams.
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
public XLucene43NGramTokenizer(Reader input, int minGram, int maxGram) {
|
|
||||||
super(input);
|
|
||||||
init(minGram, maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates NGramTokenizer with given min and max n-grams.
|
|
||||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
public XLucene43NGramTokenizer(AttributeFactory factory, Reader input, int minGram, int maxGram) {
|
|
||||||
super(factory, input);
|
|
||||||
init(minGram, maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates NGramTokenizer with default min and max n-grams.
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
*/
|
|
||||||
public XLucene43NGramTokenizer(Reader input) {
|
|
||||||
this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void init(int minGram, int maxGram) {
|
|
||||||
if (minGram < 1) {
|
|
||||||
throw new IllegalArgumentException("minGram must be greater than zero");
|
|
||||||
}
|
|
||||||
if (minGram > maxGram) {
|
|
||||||
throw new IllegalArgumentException("minGram must not be greater than maxGram");
|
|
||||||
}
|
|
||||||
this.minGram = minGram;
|
|
||||||
this.maxGram = maxGram;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the next token in the stream, or null at EOS. */
|
|
||||||
@Override
|
|
||||||
public boolean incrementToken() throws IOException {
|
|
||||||
clearAttributes();
|
|
||||||
if (!started) {
|
|
||||||
started = true;
|
|
||||||
gramSize = minGram;
|
|
||||||
char[] chars = new char[1024];
|
|
||||||
charsRead = 0;
|
|
||||||
// TODO: refactor to a shared readFully somewhere:
|
|
||||||
while (charsRead < chars.length) {
|
|
||||||
int inc = input.read(chars, charsRead, chars.length-charsRead);
|
|
||||||
if (inc == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
charsRead += inc;
|
|
||||||
}
|
|
||||||
inStr = new String(chars, 0, charsRead).trim(); // remove any trailing empty strings
|
|
||||||
|
|
||||||
if (charsRead == chars.length) {
|
|
||||||
// Read extra throwaway chars so that on end() we
|
|
||||||
// report the correct offset:
|
|
||||||
char[] throwaway = new char[1024];
|
|
||||||
while(true) {
|
|
||||||
final int inc = input.read(throwaway, 0, throwaway.length);
|
|
||||||
if (inc == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
charsRead += inc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inLen = inStr.length();
|
|
||||||
if (inLen == 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pos+gramSize > inLen) { // if we hit the end of the string
|
|
||||||
pos = 0; // reset to beginning of string
|
|
||||||
gramSize++; // increase n-gram size
|
|
||||||
if (gramSize > maxGram) // we are done
|
|
||||||
return false;
|
|
||||||
if (pos+gramSize > inLen)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
int oldPos = pos;
|
|
||||||
pos++;
|
|
||||||
termAtt.setEmpty().append(inStr, oldPos, oldPos+gramSize);
|
|
||||||
offsetAtt.setOffset(correctOffset(oldPos), correctOffset(oldPos+gramSize));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void end() {
|
|
||||||
// set final offset
|
|
||||||
final int finalOffset = correctOffset(charsRead);
|
|
||||||
this.offsetAtt.setOffset(finalOffset, finalOffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void reset() throws IOException {
|
|
||||||
super.reset();
|
|
||||||
started = false;
|
|
||||||
pos = 0;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,158 +0,0 @@
|
||||||
package org.apache.lucene.analysis.ngram;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
|
||||||
import org.apache.lucene.analysis.util.XCharacterUtils;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tokenizes the input into n-grams of the given size(s).
|
|
||||||
* <a name="version"/>
|
|
||||||
* <p>You must specify the required {@link Version} compatibility when
|
|
||||||
* creating a {@link XNGramTokenFilter}. As of Lucene 4.4, this token filters:<ul>
|
|
||||||
* <li>handles supplementary characters correctly,</li>
|
|
||||||
* <li>emits all n-grams for the same token at the same position,</li>
|
|
||||||
* <li>does not modify offsets,</li>
|
|
||||||
* <li>sorts n-grams by their offset in the original token first, then
|
|
||||||
* increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
|
|
||||||
* "c").</li></ul>
|
|
||||||
* <p>You can make this filter use the old behavior by providing a version <
|
|
||||||
* {@link Version#LUCENE_44} in the constructor but this is not recommended as
|
|
||||||
* it will lead to broken {@link TokenStream}s that will cause highlighting
|
|
||||||
* bugs.
|
|
||||||
* <p>If you were using this {@link TokenFilter} to perform partial highlighting,
|
|
||||||
* this won't work anymore since this filter doesn't update offsets. You should
|
|
||||||
* modify your analysis chain to use {@link NGramTokenizer}, and potentially
|
|
||||||
* override {@link NGramTokenizer#isTokenChar(int)} to perform pre-tokenization.
|
|
||||||
*/
|
|
||||||
public final class XNGramTokenFilter extends TokenFilter {
|
|
||||||
|
|
||||||
static {
|
|
||||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
|
|
||||||
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
|
|
||||||
|
|
||||||
private final int minGram, maxGram;
|
|
||||||
|
|
||||||
private char[] curTermBuffer;
|
|
||||||
private int curTermLength;
|
|
||||||
private int curCodePointCount;
|
|
||||||
private int curGramSize;
|
|
||||||
private int curPos;
|
|
||||||
private int curPosInc, curPosLen;
|
|
||||||
private int tokStart;
|
|
||||||
private int tokEnd;
|
|
||||||
|
|
||||||
private final XCharacterUtils charUtils;
|
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
|
||||||
private final PositionIncrementAttribute posIncAtt;
|
|
||||||
private final PositionLengthAttribute posLenAtt;
|
|
||||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates XNGramTokenFilter with given min and max n-grams.
|
|
||||||
* @param version Lucene version to enable correct position increments.
|
|
||||||
* See <a href="#version">above</a> for details.
|
|
||||||
* @param input {@link TokenStream} holding the input to be tokenized
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
public XNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
|
|
||||||
super(new LengthFilter(true, input, minGram, Integer.MAX_VALUE));
|
|
||||||
this.charUtils = XCharacterUtils.getInstance(version);
|
|
||||||
if (minGram < 1) {
|
|
||||||
throw new IllegalArgumentException("minGram must be greater than zero");
|
|
||||||
}
|
|
||||||
if (minGram > maxGram) {
|
|
||||||
throw new IllegalArgumentException("minGram must not be greater than maxGram");
|
|
||||||
}
|
|
||||||
this.minGram = minGram;
|
|
||||||
this.maxGram = maxGram;
|
|
||||||
posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
|
||||||
posLenAtt = addAttribute(PositionLengthAttribute.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates XNGramTokenFilter with default min and max n-grams.
|
|
||||||
* @param version Lucene version to enable correct position increments.
|
|
||||||
* See <a href="#version">above</a> for details.
|
|
||||||
* @param input {@link TokenStream} holding the input to be tokenized
|
|
||||||
*/
|
|
||||||
public XNGramTokenFilter(Version version, TokenStream input) {
|
|
||||||
this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the next token in the stream, or null at EOS. */
|
|
||||||
@Override
|
|
||||||
public final boolean incrementToken() throws IOException {
|
|
||||||
while (true) {
|
|
||||||
if (curTermBuffer == null) {
|
|
||||||
if (!input.incrementToken()) {
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
curTermBuffer = termAtt.buffer().clone();
|
|
||||||
curTermLength = termAtt.length();
|
|
||||||
curCodePointCount = charUtils.codePointCount(termAtt);
|
|
||||||
curGramSize = minGram;
|
|
||||||
curPos = 0;
|
|
||||||
curPosInc = posIncAtt.getPositionIncrement();
|
|
||||||
curPosLen = posLenAtt.getPositionLength();
|
|
||||||
tokStart = offsetAtt.startOffset();
|
|
||||||
tokEnd = offsetAtt.endOffset();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount) {
|
|
||||||
++curPos;
|
|
||||||
curGramSize = minGram;
|
|
||||||
}
|
|
||||||
if ((curPos + curGramSize) <= curCodePointCount) {
|
|
||||||
clearAttributes();
|
|
||||||
final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
|
|
||||||
final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
|
|
||||||
termAtt.copyBuffer(curTermBuffer, start, end - start);
|
|
||||||
posIncAtt.setPositionIncrement(curPosInc);
|
|
||||||
curPosInc = 0;
|
|
||||||
posLenAtt.setPositionLength(curPosLen);
|
|
||||||
offsetAtt.setOffset(tokStart, tokEnd);
|
|
||||||
curGramSize++;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
curTermBuffer = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void reset() throws IOException {
|
|
||||||
super.reset();
|
|
||||||
curTermBuffer = null;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,248 +0,0 @@
|
||||||
package org.apache.lucene.analysis.ngram;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
|
||||||
import org.apache.lucene.analysis.util.XCharacterUtils;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tokenizes the input into n-grams of the given size(s).
|
|
||||||
* <p>On the contrary to {@link NGramTokenFilter}, this class sets offsets so
|
|
||||||
* that characters between startOffset and endOffset in the original stream are
|
|
||||||
* the same as the term chars.
|
|
||||||
* <p>For example, "abcde" would be tokenized as (minGram=2, maxGram=3):
|
|
||||||
* <table>
|
|
||||||
* <tr><th>Term</th><td>ab</td><td>abc</td><td>bc</td><td>bcd</td><td>cd</td><td>cde</td><td>de</td></tr>
|
|
||||||
* <tr><th>Position increment</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
|
|
||||||
* <tr><th>Position length</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
|
|
||||||
* <tr><th>Offsets</th><td>[0,2[</td><td>[0,3[</td><td>[1,3[</td><td>[1,4[</td><td>[2,4[</td><td>[2,5[</td><td>[3,5[</td></tr>
|
|
||||||
* </table>
|
|
||||||
* <a name="version"/>
|
|
||||||
* <p>This tokenizer changed a lot in Lucene 4.4 in order to:<ul>
|
|
||||||
* <li>tokenize in a streaming fashion to support streams which are larger
|
|
||||||
* than 1024 chars (limit of the previous version),
|
|
||||||
* <li>count grams based on unicode code points instead of java chars (and
|
|
||||||
* never split in the middle of surrogate pairs),
|
|
||||||
* <li>give the ability to {@link #isTokenChar(int) pre-tokenize} the stream
|
|
||||||
* before computing n-grams.</ul>
|
|
||||||
* <p>Additionally, this class doesn't trim trailing whitespaces and emits
|
|
||||||
* tokens in a different order, tokens are now emitted by increasing start
|
|
||||||
* offsets while they used to be emitted by increasing lengths (which prevented
|
|
||||||
* from supporting large input streams).
|
|
||||||
* <p>Although <b style="color:red">highly</b> discouraged, it is still possible
|
|
||||||
* to use the old behavior through {@link Lucene43NGramTokenizer}.
|
|
||||||
*/
|
|
||||||
// non-final to allow for overriding isTokenChar, but all other methods should be final
|
|
||||||
public class XNGramTokenizer extends Tokenizer {
|
|
||||||
|
|
||||||
static {
|
|
||||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
|
|
||||||
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
|
|
||||||
|
|
||||||
private XCharacterUtils charUtils;
|
|
||||||
private XCharacterUtils.CharacterBuffer charBuffer;
|
|
||||||
private int[] buffer; // like charBuffer, but converted to code points
|
|
||||||
private int bufferStart, bufferEnd; // remaining slice in buffer
|
|
||||||
private int offset;
|
|
||||||
private int gramSize;
|
|
||||||
private int minGram, maxGram;
|
|
||||||
private boolean exhausted;
|
|
||||||
private int lastCheckedChar; // last offset in the buffer that we checked
|
|
||||||
private int lastNonTokenChar; // last offset that we found to not be a token char
|
|
||||||
private boolean edgesOnly; // leading edges n-grams only
|
|
||||||
|
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
|
||||||
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
|
||||||
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
|
|
||||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
|
||||||
|
|
||||||
XNGramTokenizer(Version version, Reader input, int minGram, int maxGram, boolean edgesOnly) {
|
|
||||||
super(input);
|
|
||||||
init(version, minGram, maxGram, edgesOnly);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates NGramTokenizer with given min and max n-grams.
|
|
||||||
* @param version the lucene compatibility <a href="#version">version</a>
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
public XNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
|
|
||||||
this(version, input, minGram, maxGram, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
XNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram, boolean edgesOnly) {
|
|
||||||
super(factory, input);
|
|
||||||
init(version, minGram, maxGram, edgesOnly);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates NGramTokenizer with given min and max n-grams.
|
|
||||||
* @param version the lucene compatibility <a href="#version">version</a>
|
|
||||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
* @param minGram the smallest n-gram to generate
|
|
||||||
* @param maxGram the largest n-gram to generate
|
|
||||||
*/
|
|
||||||
public XNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
|
|
||||||
this(version, factory, input, minGram, maxGram, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates NGramTokenizer with default min and max n-grams.
|
|
||||||
* @param version the lucene compatibility <a href="#version">version</a>
|
|
||||||
* @param input {@link Reader} holding the input to be tokenized
|
|
||||||
*/
|
|
||||||
public XNGramTokenizer(Version version, Reader input) {
|
|
||||||
this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void init(Version version, int minGram, int maxGram, boolean edgesOnly) {
|
|
||||||
if (!version.onOrAfter(Version.LUCENE_43)) {
|
|
||||||
throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
|
|
||||||
}
|
|
||||||
charUtils = version.onOrAfter(Version.LUCENE_43)
|
|
||||||
? XCharacterUtils.getInstance(version)
|
|
||||||
: XCharacterUtils.getJava4Instance();
|
|
||||||
if (minGram < 1) {
|
|
||||||
throw new IllegalArgumentException("minGram must be greater than zero");
|
|
||||||
}
|
|
||||||
if (minGram > maxGram) {
|
|
||||||
throw new IllegalArgumentException("minGram must not be greater than maxGram");
|
|
||||||
}
|
|
||||||
this.minGram = minGram;
|
|
||||||
this.maxGram = maxGram;
|
|
||||||
this.edgesOnly = edgesOnly;
|
|
||||||
charBuffer = XCharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
|
|
||||||
buffer = new int[charBuffer.getBuffer().length];
|
|
||||||
// Make the term att large enough
|
|
||||||
termAtt.resizeBuffer(2 * maxGram);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final boolean incrementToken() throws IOException {
|
|
||||||
clearAttributes();
|
|
||||||
|
|
||||||
// termination of this loop is guaranteed by the fact that every iteration
|
|
||||||
// either advances the buffer (calls consumes()) or increases gramSize
|
|
||||||
while (true) {
|
|
||||||
// compact
|
|
||||||
if (bufferStart >= bufferEnd - maxGram - 1 && !exhausted) {
|
|
||||||
System.arraycopy(buffer, bufferStart, buffer, 0, bufferEnd - bufferStart);
|
|
||||||
bufferEnd -= bufferStart;
|
|
||||||
lastCheckedChar -= bufferStart;
|
|
||||||
lastNonTokenChar -= bufferStart;
|
|
||||||
bufferStart = 0;
|
|
||||||
|
|
||||||
// fill in remaining space
|
|
||||||
exhausted = !charUtils.fill(charBuffer, input, buffer.length - bufferEnd);
|
|
||||||
// convert to code points
|
|
||||||
bufferEnd += charUtils.toCodePoints(charBuffer.getBuffer(), 0, charBuffer.getLength(), buffer, bufferEnd);
|
|
||||||
}
|
|
||||||
|
|
||||||
// should we go to the next offset?
|
|
||||||
if (gramSize > maxGram || (bufferStart + gramSize) > bufferEnd) {
|
|
||||||
if (bufferStart + 1 + minGram > bufferEnd) {
|
|
||||||
assert exhausted;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
consume();
|
|
||||||
gramSize = minGram;
|
|
||||||
}
|
|
||||||
|
|
||||||
updateLastNonTokenChar();
|
|
||||||
|
|
||||||
// retry if the token to be emitted was going to not only contain token chars
|
|
||||||
final boolean termContainsNonTokenChar = lastNonTokenChar >= bufferStart && lastNonTokenChar < (bufferStart + gramSize);
|
|
||||||
final boolean isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar != bufferStart - 1;
|
|
||||||
if (termContainsNonTokenChar || isEdgeAndPreviousCharIsTokenChar) {
|
|
||||||
consume();
|
|
||||||
gramSize = minGram;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
final int length = charUtils.toChars(buffer, bufferStart, gramSize, termAtt.buffer(), 0);
|
|
||||||
termAtt.setLength(length);
|
|
||||||
posIncAtt.setPositionIncrement(1);
|
|
||||||
posLenAtt.setPositionLength(1);
|
|
||||||
offsetAtt.setOffset(correctOffset(offset), correctOffset(offset + length));
|
|
||||||
++gramSize;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateLastNonTokenChar() {
|
|
||||||
final int termEnd = bufferStart + gramSize - 1;
|
|
||||||
if (termEnd > lastCheckedChar) {
|
|
||||||
for (int i = termEnd; i > lastCheckedChar; --i) {
|
|
||||||
if (!isTokenChar(buffer[i])) {
|
|
||||||
lastNonTokenChar = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
lastCheckedChar = termEnd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Consume one code point. */
|
|
||||||
private void consume() {
|
|
||||||
offset += Character.charCount(buffer[bufferStart++]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Only collect characters which satisfy this condition. */
|
|
||||||
protected boolean isTokenChar(int chr) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final void end() {
|
|
||||||
assert bufferStart <= bufferEnd;
|
|
||||||
int endOffset = offset;
|
|
||||||
for (int i = bufferStart; i < bufferEnd; ++i) {
|
|
||||||
endOffset += Character.charCount(buffer[i]);
|
|
||||||
}
|
|
||||||
endOffset = correctOffset(endOffset);
|
|
||||||
offsetAtt.setOffset(endOffset, endOffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final void reset() throws IOException {
|
|
||||||
super.reset();
|
|
||||||
bufferStart = bufferEnd = buffer.length;
|
|
||||||
lastNonTokenChar = lastCheckedChar = bufferStart - 1;
|
|
||||||
offset = 0;
|
|
||||||
gramSize = minGram;
|
|
||||||
exhausted = false;
|
|
||||||
charBuffer.reset();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -20,7 +20,7 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.vectorhighlight.XFieldTermStack.TermInfo;
|
import org.apache.lucene.search.vectorhighlight.XFieldTermStack.TermInfo;
|
||||||
import org.apache.lucene.util.SorterTemplate;
|
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
@ -364,7 +364,7 @@ public class XFieldQuery {
|
||||||
PhraseQuery pq = (PhraseQuery)query;
|
PhraseQuery pq = (PhraseQuery)query;
|
||||||
final Term[] terms = pq.getTerms();
|
final Term[] terms = pq.getTerms();
|
||||||
final int[] positions = pq.getPositions();
|
final int[] positions = pq.getPositions();
|
||||||
new SorterTemplate() {
|
new InPlaceMergeSorter() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void swap(int i, int j) {
|
protected void swap(int i, int j) {
|
||||||
|
@ -381,17 +381,7 @@ public class XFieldQuery {
|
||||||
protected int compare(int i, int j) {
|
protected int compare(int i, int j) {
|
||||||
return positions[i] - positions[j];
|
return positions[i] - positions[j];
|
||||||
}
|
}
|
||||||
|
}.sort(0, terms.length);
|
||||||
@Override
|
|
||||||
protected void setPivot(int i) {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected int comparePivot(int j) {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
}.mergeSort(0, terms.length - 1);
|
|
||||||
|
|
||||||
addToMap(pq, terms, positions, 0, subMap, pq.getSlop());
|
addToMap(pq, terms, positions, 0, subMap, pq.getSlop());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,171 +0,0 @@
|
||||||
package org.apache.lucene.util.packed;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import static org.apache.lucene.util.packed.XPackedInts.checkBlockSize;
|
|
||||||
import static org.apache.lucene.util.packed.XPackedInts.numBlocks;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Base implementation for {@link XPagedMutable} and {@link PagedGrowableWriter}.
|
|
||||||
* @lucene.internal
|
|
||||||
*/
|
|
||||||
abstract class XAbstractPagedMutable<T extends XAbstractPagedMutable<T>> {
|
|
||||||
|
|
||||||
static {
|
|
||||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
|
||||||
}
|
|
||||||
|
|
||||||
static final int MIN_BLOCK_SIZE = 1 << 6;
|
|
||||||
static final int MAX_BLOCK_SIZE = 1 << 30;
|
|
||||||
|
|
||||||
final long size;
|
|
||||||
final int pageShift;
|
|
||||||
final int pageMask;
|
|
||||||
final PackedInts.Mutable[] subMutables;
|
|
||||||
final int bitsPerValue;
|
|
||||||
|
|
||||||
XAbstractPagedMutable(int bitsPerValue, long size, int pageSize) {
|
|
||||||
this.bitsPerValue = bitsPerValue;
|
|
||||||
this.size = size;
|
|
||||||
pageShift = checkBlockSize(pageSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
|
|
||||||
pageMask = pageSize - 1;
|
|
||||||
final int numPages = numBlocks(size, pageSize);
|
|
||||||
subMutables = new PackedInts.Mutable[numPages];
|
|
||||||
}
|
|
||||||
|
|
||||||
protected final void fillPages() {
|
|
||||||
final int numPages = numBlocks(size, pageSize());
|
|
||||||
for (int i = 0; i < numPages; ++i) {
|
|
||||||
// do not allocate for more entries than necessary on the last page
|
|
||||||
final int valueCount = i == numPages - 1 ? lastPageSize(size) : pageSize();
|
|
||||||
subMutables[i] = newMutable(valueCount, bitsPerValue);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected abstract PackedInts.Mutable newMutable(int valueCount, int bitsPerValue);
|
|
||||||
|
|
||||||
final int lastPageSize(long size) {
|
|
||||||
final int sz = indexInPage(size);
|
|
||||||
return sz == 0 ? pageSize() : sz;
|
|
||||||
}
|
|
||||||
|
|
||||||
final int pageSize() {
|
|
||||||
return pageMask + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** The number of values. */
|
|
||||||
public final long size() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
final int pageIndex(long index) {
|
|
||||||
return (int) (index >>> pageShift);
|
|
||||||
}
|
|
||||||
|
|
||||||
final int indexInPage(long index) {
|
|
||||||
return (int) index & pageMask;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get value at <code>index</code>. */
|
|
||||||
public final long get(long index) {
|
|
||||||
assert index >= 0 && index < size;
|
|
||||||
final int pageIndex = pageIndex(index);
|
|
||||||
final int indexInPage = indexInPage(index);
|
|
||||||
return subMutables[pageIndex].get(indexInPage);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Set value at <code>index</code>. */
|
|
||||||
public final void set(long index, long value) {
|
|
||||||
assert index >= 0 && index < size;
|
|
||||||
final int pageIndex = pageIndex(index);
|
|
||||||
final int indexInPage = indexInPage(index);
|
|
||||||
subMutables[pageIndex].set(indexInPage, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected long baseRamBytesUsed() {
|
|
||||||
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
|
||||||
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
|
||||||
+ RamUsageEstimator.NUM_BYTES_LONG
|
|
||||||
+ 3 * RamUsageEstimator.NUM_BYTES_INT;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Return the number of bytes used by this object. */
|
|
||||||
public long ramBytesUsed() {
|
|
||||||
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed());
|
|
||||||
bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length);
|
|
||||||
for (PackedInts.Mutable gw : subMutables) {
|
|
||||||
bytesUsed += gw.ramBytesUsed();
|
|
||||||
}
|
|
||||||
return bytesUsed;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected abstract T newUnfilledCopy(long newSize);
|
|
||||||
|
|
||||||
/** Create a new copy of size <code>newSize</code> based on the content of
|
|
||||||
* this buffer. This method is much more efficient than creating a new
|
|
||||||
* instance and copying values one by one. */
|
|
||||||
public final T resize(long newSize) {
|
|
||||||
final T copy = newUnfilledCopy(newSize);
|
|
||||||
final int numCommonPages = Math.min(copy.subMutables.length, subMutables.length);
|
|
||||||
final long[] copyBuffer = new long[1024];
|
|
||||||
for (int i = 0; i < copy.subMutables.length; ++i) {
|
|
||||||
final int valueCount = i == copy.subMutables.length - 1 ? lastPageSize(newSize) : pageSize();
|
|
||||||
final int bpv = i < numCommonPages ? subMutables[i].getBitsPerValue() : this.bitsPerValue;
|
|
||||||
copy.subMutables[i] = newMutable(valueCount, bpv);
|
|
||||||
if (i < numCommonPages) {
|
|
||||||
final int copyLength = Math.min(valueCount, subMutables[i].size());
|
|
||||||
XPackedInts.copy(subMutables[i], 0, copy.subMutables[i], 0, copyLength, copyBuffer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Similar to {@link ArrayUtil#grow(long[], int)}. */
|
|
||||||
public final T grow(long minSize) {
|
|
||||||
assert minSize >= 0;
|
|
||||||
if (minSize <= size()) {
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
final T result = (T) this;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
long extra = minSize >>> 3;
|
|
||||||
if (extra < 3) {
|
|
||||||
extra = 3;
|
|
||||||
}
|
|
||||||
final long newSize = minSize + extra;
|
|
||||||
return resize(newSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Similar to {@link ArrayUtil#grow(long[])}. */
|
|
||||||
public final T grow() {
|
|
||||||
return grow(size() + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final String toString() {
|
|
||||||
return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,162 +0,0 @@
|
||||||
package org.apache.lucene.util.packed;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.DataOutput;
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements {@link XPackedInts.Mutable}, but grows the
|
|
||||||
* bit count of the underlying packed ints on-demand.
|
|
||||||
* <p>Beware that this class will accept to set negative values but in order
|
|
||||||
* to do this, it will grow the number of bits per value to 64.
|
|
||||||
*
|
|
||||||
* <p>@lucene.internal</p>
|
|
||||||
*/
|
|
||||||
public class XGrowableWriter implements PackedInts.Mutable {
|
|
||||||
|
|
||||||
static {
|
|
||||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
|
||||||
}
|
|
||||||
|
|
||||||
private long currentMask;
|
|
||||||
private PackedInts.Mutable current;
|
|
||||||
private final float acceptableOverheadRatio;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param startBitsPerValue the initial number of bits per value, may grow depending on the data
|
|
||||||
* @param valueCount the number of values
|
|
||||||
* @param acceptableOverheadRatio an acceptable overhead ratio
|
|
||||||
*/
|
|
||||||
public XGrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) {
|
|
||||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
|
||||||
current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
|
|
||||||
currentMask = mask(current.getBitsPerValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
private static long mask(int bitsPerValue) {
|
|
||||||
return bitsPerValue == 64 ? ~0L : PackedInts.maxValue(bitsPerValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long get(int index) {
|
|
||||||
return current.get(index);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return current.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getBitsPerValue() {
|
|
||||||
return current.getBitsPerValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
public PackedInts.Mutable getMutable() {
|
|
||||||
return current;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object getArray() {
|
|
||||||
return current.getArray();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasArray() {
|
|
||||||
return current.hasArray();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void ensureCapacity(long value) {
|
|
||||||
if ((value & currentMask) == value) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
final int bitsRequired = value < 0 ? 64 : PackedInts.bitsRequired(value);
|
|
||||||
assert bitsRequired > current.getBitsPerValue();
|
|
||||||
final int valueCount = size();
|
|
||||||
PackedInts.Mutable next = PackedInts.getMutable(valueCount, bitsRequired, acceptableOverheadRatio);
|
|
||||||
PackedInts.copy(current, 0, next, 0, valueCount, PackedInts.DEFAULT_BUFFER_SIZE);
|
|
||||||
current = next;
|
|
||||||
currentMask = mask(current.getBitsPerValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void set(int index, long value) {
|
|
||||||
ensureCapacity(value);
|
|
||||||
current.set(index, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void clear() {
|
|
||||||
current.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
public XGrowableWriter resize(int newSize) {
|
|
||||||
XGrowableWriter next = new XGrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio);
|
|
||||||
final int limit = Math.min(size(), newSize);
|
|
||||||
PackedInts.copy(current, 0, next, 0, limit, PackedInts.DEFAULT_BUFFER_SIZE);
|
|
||||||
return next;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int get(int index, long[] arr, int off, int len) {
|
|
||||||
return current.get(index, arr, off, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int set(int index, long[] arr, int off, int len) {
|
|
||||||
long max = 0;
|
|
||||||
for (int i = off, end = off + len; i < end; ++i) {
|
|
||||||
// bitwise or is nice because either all values are positive and the
|
|
||||||
// or-ed result will require as many bits per value as the max of the
|
|
||||||
// values, or one of them is negative and the result will be negative,
|
|
||||||
// forcing GrowableWriter to use 64 bits per value
|
|
||||||
max |= arr[i];
|
|
||||||
}
|
|
||||||
ensureCapacity(max);
|
|
||||||
return current.set(index, arr, off, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void fill(int fromIndex, int toIndex, long val) {
|
|
||||||
ensureCapacity(val);
|
|
||||||
current.fill(fromIndex, toIndex, val);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long ramBytesUsed() {
|
|
||||||
return RamUsageEstimator.alignObjectSize(
|
|
||||||
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
|
||||||
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
|
||||||
+ RamUsageEstimator.NUM_BYTES_LONG
|
|
||||||
+ RamUsageEstimator.NUM_BYTES_FLOAT)
|
|
||||||
+ current.ramBytesUsed();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void save(DataOutput out) throws IOException {
|
|
||||||
current.save(out);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,88 +0,0 @@
|
||||||
package org.apache.lucene.util.packed;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.apache.lucene.util.packed.PackedInts.Mutable;
|
|
||||||
import org.apache.lucene.util.packed.PackedInts.Reader;
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Simplistic compression for array of unsigned long values.
|
|
||||||
* Each value is >= 0 and <= a specified maximum value. The
|
|
||||||
* values are stored as packed ints, with each value
|
|
||||||
* consuming a fixed number of bits.
|
|
||||||
*
|
|
||||||
* @lucene.internal
|
|
||||||
*/
|
|
||||||
public class XPackedInts {
|
|
||||||
|
|
||||||
static {
|
|
||||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */
|
|
||||||
static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) {
|
|
||||||
assert buf.length > 0;
|
|
||||||
int remaining = 0;
|
|
||||||
while (len > 0) {
|
|
||||||
final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
|
|
||||||
assert read > 0;
|
|
||||||
srcPos += read;
|
|
||||||
len -= read;
|
|
||||||
remaining += read;
|
|
||||||
final int written = dest.set(destPos, buf, 0, remaining);
|
|
||||||
assert written > 0;
|
|
||||||
destPos += written;
|
|
||||||
if (written < remaining) {
|
|
||||||
System.arraycopy(buf, written, buf, 0, remaining - written);
|
|
||||||
}
|
|
||||||
remaining -= written;
|
|
||||||
}
|
|
||||||
while (remaining > 0) {
|
|
||||||
final int written = dest.set(destPos, buf, 0, remaining);
|
|
||||||
destPos += written;
|
|
||||||
remaining -= written;
|
|
||||||
System.arraycopy(buf, written, buf, 0, remaining);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Check that the block size is a power of 2, in the right bounds, and return
|
|
||||||
* its log in base 2. */
|
|
||||||
static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) {
|
|
||||||
if (blockSize < minBlockSize || blockSize > maxBlockSize) {
|
|
||||||
throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize);
|
|
||||||
}
|
|
||||||
if ((blockSize & (blockSize - 1)) != 0) {
|
|
||||||
throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
|
|
||||||
}
|
|
||||||
return Integer.numberOfTrailingZeros(blockSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Return the number of blocks required to store <code>size</code> values on
|
|
||||||
* <code>blockSize</code>. */
|
|
||||||
static int numBlocks(long size, int blockSize) {
|
|
||||||
final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1);
|
|
||||||
if ((long) numBlocks * blockSize < size) {
|
|
||||||
throw new IllegalArgumentException("size is too large for this block size");
|
|
||||||
}
|
|
||||||
return numBlocks;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,79 +0,0 @@
|
||||||
package org.apache.lucene.util.packed;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
import org.apache.lucene.util.packed.PackedInts.Mutable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link XPagedGrowableWriter}. This class slices data into fixed-size blocks
|
|
||||||
* which have independent numbers of bits per value and grow on-demand.
|
|
||||||
* <p>You should use this class instead of {@link AppendingLongBuffer} only when
|
|
||||||
* you need random write-access. Otherwise this class will likely be slower and
|
|
||||||
* less memory-efficient.
|
|
||||||
* @lucene.internal
|
|
||||||
*/
|
|
||||||
public final class XPagedGrowableWriter extends XAbstractPagedMutable<XPagedGrowableWriter> {
|
|
||||||
|
|
||||||
static {
|
|
||||||
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
|
||||||
}
|
|
||||||
|
|
||||||
final float acceptableOverheadRatio;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new {@link XPagedGrowableWriter} instance.
|
|
||||||
*
|
|
||||||
* @param size the number of values to store.
|
|
||||||
* @param pageSize the number of values per page
|
|
||||||
* @param startBitsPerValue the initial number of bits per value
|
|
||||||
* @param acceptableOverheadRatio an acceptable overhead ratio
|
|
||||||
*/
|
|
||||||
public XPagedGrowableWriter(long size, int pageSize,
|
|
||||||
int startBitsPerValue, float acceptableOverheadRatio) {
|
|
||||||
this(size, pageSize, startBitsPerValue, acceptableOverheadRatio, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
XPagedGrowableWriter(long size, int pageSize,int startBitsPerValue, float acceptableOverheadRatio, boolean fillPages) {
|
|
||||||
super(startBitsPerValue, size, pageSize);
|
|
||||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
|
||||||
if (fillPages) {
|
|
||||||
fillPages();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Mutable newMutable(int valueCount, int bitsPerValue) {
|
|
||||||
return new XGrowableWriter(bitsPerValue, valueCount, acceptableOverheadRatio);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected XPagedGrowableWriter newUnfilledCopy(long newSize) {
|
|
||||||
return new XPagedGrowableWriter(newSize, pageSize(), bitsPerValue, acceptableOverheadRatio, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected long baseRamBytesUsed() {
|
|
||||||
return super.baseRamBytesUsed() + RamUsageEstimator.NUM_BYTES_FLOAT;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -128,7 +128,7 @@ public class Version implements Serializable {
|
||||||
public static final Version V_0_90_3 = new Version(V_0_90_3_ID, false, org.apache.lucene.util.Version.LUCENE_43);
|
public static final Version V_0_90_3 = new Version(V_0_90_3_ID, false, org.apache.lucene.util.Version.LUCENE_43);
|
||||||
|
|
||||||
public static final int V_1_0_0_Beta1_ID = /*00*/1000001;
|
public static final int V_1_0_0_Beta1_ID = /*00*/1000001;
|
||||||
public static final Version V_1_0_0_Beta1 = new Version(V_1_0_0_Beta1_ID, true, org.apache.lucene.util.Version.LUCENE_43);
|
public static final Version V_1_0_0_Beta1 = new Version(V_1_0_0_Beta1_ID, true, org.apache.lucene.util.Version.LUCENE_44);
|
||||||
|
|
||||||
public static final Version CURRENT = V_1_0_0_Beta1;
|
public static final Version CURRENT = V_1_0_0_Beta1;
|
||||||
|
|
||||||
|
|
|
@ -473,7 +473,7 @@ public class MetaDataCreateIndexService extends AbstractComponent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CollectionUtil.quickSort(templates, new Comparator<IndexTemplateMetaData>() {
|
CollectionUtil.timSort(templates, new Comparator<IndexTemplateMetaData>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(IndexTemplateMetaData o1, IndexTemplateMetaData o2) {
|
public int compare(IndexTemplateMetaData o1, IndexTemplateMetaData o2) {
|
||||||
return o2.order() - o1.order();
|
return o2.order() - o1.order();
|
||||||
|
|
|
@ -19,8 +19,11 @@
|
||||||
|
|
||||||
package org.elasticsearch.cluster.routing.allocation.allocator;
|
package org.elasticsearch.cluster.routing.allocation.allocator;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.IntroSorter;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.IntroSorter;
|
||||||
|
|
||||||
import com.google.common.base.Predicate;
|
import com.google.common.base.Predicate;
|
||||||
import org.apache.lucene.util.SorterTemplate;
|
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.cluster.metadata.MetaData;
|
import org.elasticsearch.cluster.metadata.MetaData;
|
||||||
import org.elasticsearch.cluster.routing.MutableShardRouting;
|
import org.elasticsearch.cluster.routing.MutableShardRouting;
|
||||||
|
@ -404,7 +407,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
||||||
*/
|
*/
|
||||||
weights[lowIdx] = sorter.weight(Operation.BALANCE, modelNodes[lowIdx]);
|
weights[lowIdx] = sorter.weight(Operation.BALANCE, modelNodes[lowIdx]);
|
||||||
weights[highIdx] = sorter.weight(Operation.BALANCE, modelNodes[highIdx]);
|
weights[highIdx] = sorter.weight(Operation.BALANCE, modelNodes[highIdx]);
|
||||||
sorter.quickSort(0, weights.length - 1);
|
sorter.sort(0, weights.length);
|
||||||
lowIdx = 0;
|
lowIdx = 0;
|
||||||
highIdx = weights.length - 1;
|
highIdx = weights.length - 1;
|
||||||
changed = true;
|
changed = true;
|
||||||
|
@ -451,7 +454,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
||||||
sorter.reset(operation, indices[i]);
|
sorter.reset(operation, indices[i]);
|
||||||
deltas[i] = sorter.delta();
|
deltas[i] = sorter.delta();
|
||||||
}
|
}
|
||||||
new SorterTemplate() {
|
new IntroSorter() {
|
||||||
float pivotWeight;
|
float pivotWeight;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -478,7 +481,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
||||||
protected int comparePivot(int j) {
|
protected int comparePivot(int j) {
|
||||||
return Float.compare(deltas[j], pivotWeight);
|
return Float.compare(deltas[j], pivotWeight);
|
||||||
}
|
}
|
||||||
}.quickSort(0, deltas.length - 1);
|
}.sort(0, deltas.length);
|
||||||
|
|
||||||
return indices;
|
return indices;
|
||||||
}
|
}
|
||||||
|
@ -956,7 +959,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class NodeSorter extends SorterTemplate {
|
static final class NodeSorter extends IntroSorter {
|
||||||
|
|
||||||
final ModelNode[] modelNodes;
|
final ModelNode[] modelNodes;
|
||||||
/* the nodes weights with respect to the current weight function / index */
|
/* the nodes weights with respect to the current weight function / index */
|
||||||
|
@ -982,7 +985,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
||||||
for (int i = 0; i < weights.length; i++) {
|
for (int i = 0; i < weights.length; i++) {
|
||||||
weights[i] = weight(operation, modelNodes[i]);
|
weights[i] = weight(operation, modelNodes[i]);
|
||||||
}
|
}
|
||||||
quickSort(0, modelNodes.length - 1);
|
sort(0, modelNodes.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
public float weight(Operation operation, ModelNode node) {
|
public float weight(Operation operation, ModelNode node) {
|
||||||
|
|
|
@ -35,14 +35,13 @@ import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.reflect.Field;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class Lucene {
|
public class Lucene {
|
||||||
|
|
||||||
public static final Version VERSION = Version.LUCENE_43;
|
public static final Version VERSION = Version.LUCENE_44;
|
||||||
public static final Version ANALYZER_VERSION = VERSION;
|
public static final Version ANALYZER_VERSION = VERSION;
|
||||||
public static final Version QUERYPARSER_VERSION = VERSION;
|
public static final Version QUERYPARSER_VERSION = VERSION;
|
||||||
|
|
||||||
|
@ -57,6 +56,9 @@ public class Lucene {
|
||||||
if (version == null) {
|
if (version == null) {
|
||||||
return defaultVersion;
|
return defaultVersion;
|
||||||
}
|
}
|
||||||
|
if ("4.4".equals(version)) {
|
||||||
|
return VERSION.LUCENE_44;
|
||||||
|
}
|
||||||
if ("4.3".equals(version)) {
|
if ("4.3".equals(version)) {
|
||||||
return Version.LUCENE_43;
|
return Version.LUCENE_43;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.common.lucene.all;
|
package org.elasticsearch.common.lucene.all;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
|
@ -27,7 +29,6 @@ import org.apache.lucene.index.IndexReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
|
||||||
import org.apache.lucene.search.spans.SpanScorer;
|
import org.apache.lucene.search.spans.SpanScorer;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.search.spans.SpanWeight;
|
import org.apache.lucene.search.spans.SpanWeight;
|
||||||
|
@ -74,7 +75,7 @@ public class AllTermQuery extends SpanTermQuery {
|
||||||
if (this.stats == null) {
|
if (this.stats == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
SloppySimScorer sloppySimScorer = similarity.sloppySimScorer(stats, context);
|
SimScorer sloppySimScorer = similarity.simScorer(stats, context);
|
||||||
return new AllTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, sloppySimScorer);
|
return new AllTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, sloppySimScorer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,7 +84,7 @@ public class AllTermQuery extends SpanTermQuery {
|
||||||
protected float payloadScore;
|
protected float payloadScore;
|
||||||
protected int payloadsSeen;
|
protected int payloadsSeen;
|
||||||
|
|
||||||
public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
|
public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
|
||||||
super(spans, weight, docScorer);
|
super(spans, weight, docScorer);
|
||||||
positions = spans.getPostings();
|
positions = spans.getPostings();
|
||||||
}
|
}
|
||||||
|
@ -158,7 +159,7 @@ public class AllTermQuery extends SpanTermQuery {
|
||||||
int newDoc = scorer.advance(doc);
|
int newDoc = scorer.advance(doc);
|
||||||
if (newDoc == doc) {
|
if (newDoc == doc) {
|
||||||
float freq = scorer.freq();
|
float freq = scorer.freq();
|
||||||
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
SimScorer docScorer = similarity.simScorer(stats, context);
|
||||||
ComplexExplanation inner = new ComplexExplanation();
|
ComplexExplanation inner = new ComplexExplanation();
|
||||||
inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||||
|
|
|
@ -45,7 +45,7 @@ public class DocIdSets {
|
||||||
* Is it an empty {@link DocIdSet}?
|
* Is it an empty {@link DocIdSet}?
|
||||||
*/
|
*/
|
||||||
public static boolean isEmpty(@Nullable DocIdSet set) {
|
public static boolean isEmpty(@Nullable DocIdSet set) {
|
||||||
return set == null || set == DocIdSet.EMPTY_DOCIDSET;
|
return set == null || set == EMPTY_DOCIDSET;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -70,19 +70,19 @@ public class DocIdSets {
|
||||||
* <p/>
|
* <p/>
|
||||||
* Note, we don't use {@link org.apache.lucene.search.DocIdSet#isCacheable()} because execution
|
* Note, we don't use {@link org.apache.lucene.search.DocIdSet#isCacheable()} because execution
|
||||||
* might be expensive even if its cacheable (i.e. not going back to the reader to execute). We effectively
|
* might be expensive even if its cacheable (i.e. not going back to the reader to execute). We effectively
|
||||||
* always either return {@link DocIdSet#EMPTY_DOCIDSET} or {@link FixedBitSet}.
|
* always either return an empty {@link DocIdSet} or {@link FixedBitSet} but never <code>null</code>.
|
||||||
*/
|
*/
|
||||||
public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
|
public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
|
||||||
if (set == null || set == DocIdSet.EMPTY_DOCIDSET) {
|
if (set == null || set == EMPTY_DOCIDSET) {
|
||||||
return DocIdSet.EMPTY_DOCIDSET;
|
return EMPTY_DOCIDSET;
|
||||||
}
|
}
|
||||||
DocIdSetIterator it = set.iterator();
|
DocIdSetIterator it = set.iterator();
|
||||||
if (it == null) {
|
if (it == null) {
|
||||||
return DocIdSet.EMPTY_DOCIDSET;
|
return EMPTY_DOCIDSET;
|
||||||
}
|
}
|
||||||
int doc = it.nextDoc();
|
int doc = it.nextDoc();
|
||||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
return DocIdSet.EMPTY_DOCIDSET;
|
return EMPTY_DOCIDSET;
|
||||||
}
|
}
|
||||||
if (set instanceof FixedBitSet) {
|
if (set instanceof FixedBitSet) {
|
||||||
return set;
|
return set;
|
||||||
|
@ -95,6 +95,26 @@ public class DocIdSets {
|
||||||
return fixedBitSet;
|
return fixedBitSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** An empty {@code DocIdSet} instance */
|
||||||
|
protected static final DocIdSet EMPTY_DOCIDSET = new DocIdSet() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocIdSetIterator iterator() {
|
||||||
|
return DocIdSetIterator.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isCacheable() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we explicitly provide no random access, as this filter is 100% sparse and iterator exits faster
|
||||||
|
@Override
|
||||||
|
public Bits bits() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a set to bits.
|
* Gets a set to bits.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -110,7 +110,7 @@ public class ElectMasterService extends AbstractComponent {
|
||||||
it.remove();
|
it.remove();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CollectionUtil.quickSort(possibleNodes, nodeComparator);
|
CollectionUtil.introSort(possibleNodes, nodeComparator);
|
||||||
return possibleNodes;
|
return possibleNodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.ngram.*;
|
import org.apache.lucene.analysis.ngram.*;
|
||||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
|
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
|
||||||
|
@ -47,24 +49,24 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
||||||
this.side = EdgeNGramTokenFilter.Side.getSide(settings.get("side", EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
this.side = EdgeNGramTokenFilter.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
|
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
||||||
if (version.onOrAfter(Version.LUCENE_43)) {
|
if (version.onOrAfter(Version.LUCENE_43)) {
|
||||||
TokenStream result = tokenStream;
|
TokenStream result = tokenStream;
|
||||||
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
|
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
|
||||||
if (side == Side.BACK) {
|
if (side == Side.BACK) {
|
||||||
result = new ReverseStringFilter(version, result);
|
result = new ReverseStringFilter(version, result);
|
||||||
}
|
}
|
||||||
result = new XEdgeNGramTokenFilter(version, result, minGram, maxGram);
|
result = new EdgeNGramTokenFilter(version, result, minGram, maxGram);
|
||||||
if (side == Side.BACK) {
|
if (side == Side.BACK) {
|
||||||
result = new ReverseStringFilter(version, result);
|
result = new ReverseStringFilter(version, result);
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
} else {
|
}
|
||||||
return new EdgeNGramTokenFilter(tokenStream, side, minGram, maxGram);
|
return new EdgeNGramTokenFilter(version, tokenStream, side, minGram, maxGram);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -19,12 +19,13 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenizer;
|
||||||
|
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
|
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
|
||||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||||
import org.apache.lucene.analysis.ngram.XEdgeNGramTokenizer;
|
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
|
@ -45,7 +46,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
|
|
||||||
private final int maxGram;
|
private final int maxGram;
|
||||||
|
|
||||||
private final EdgeNGramTokenizer.Side side;
|
private final Lucene43EdgeNGramTokenizer.Side side;
|
||||||
|
|
||||||
private final CharMatcher matcher;
|
private final CharMatcher matcher;
|
||||||
|
|
||||||
|
@ -54,22 +55,23 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||||
this.side = EdgeNGramTokenizer.Side.getSide(settings.get("side", EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
this.side = Lucene43EdgeNGramTokenizer.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||||
this.matcher = parseTokenChars(settings.getAsArray("token_chars"));
|
this.matcher = parseTokenChars(settings.getAsArray("token_chars"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create(Reader reader) {
|
public Tokenizer create(Reader reader) {
|
||||||
if (version.onOrAfter(Version.LUCENE_43)) {
|
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
||||||
if (side == EdgeNGramTokenizer.Side.BACK) {
|
if (version.onOrAfter(Version.LUCENE_44)) {
|
||||||
|
if (side == Lucene43EdgeNGramTokenizer.Side.BACK) {
|
||||||
throw new ElasticSearchIllegalArgumentException("side=BACK is not supported anymore. Please fix your analysis chain or use"
|
throw new ElasticSearchIllegalArgumentException("side=BACK is not supported anymore. Please fix your analysis chain or use"
|
||||||
+ " an older compatibility version (<=4.2) but beware that it might cause highlighting bugs.");
|
+ " an older compatibility version (<=4.2) but beware that it might cause highlighting bugs.");
|
||||||
}
|
}
|
||||||
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
|
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
|
||||||
if (matcher == null) {
|
if (matcher == null) {
|
||||||
return new XEdgeNGramTokenizer(version, reader, minGram, maxGram);
|
return new EdgeNGramTokenizer(version, reader, minGram, maxGram);
|
||||||
} else {
|
} else {
|
||||||
return new XEdgeNGramTokenizer(version, reader, minGram, maxGram) {
|
return new EdgeNGramTokenizer(version, reader, minGram, maxGram) {
|
||||||
@Override
|
@Override
|
||||||
protected boolean isTokenChar(int chr) {
|
protected boolean isTokenChar(int chr) {
|
||||||
return matcher.isTokenChar(chr);
|
return matcher.isTokenChar(chr);
|
||||||
|
@ -77,7 +79,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return new EdgeNGramTokenizer(reader, side, minGram, maxGram);
|
return new Lucene43EdgeNGramTokenizer(version, reader, side, minGram, maxGram);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -17,6 +17,8 @@ package org.elasticsearch.index.analysis;
|
||||||
* specific language governing permissions and limitations
|
* specific language governing permissions and limitations
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -60,8 +62,8 @@ import org.elasticsearch.indices.analysis.IndicesAnalysisService;
|
||||||
*/
|
*/
|
||||||
@AnalysisSettingsRequired
|
@AnalysisSettingsRequired
|
||||||
public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
|
public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
|
||||||
private Boolean enablePositionIncrements;
|
private final CharArraySet keepWords;
|
||||||
private CharArraySet keepWords;
|
private final boolean enablePositionIncrements;
|
||||||
private static final String KEEP_WORDS_KEY = "keep_words";
|
private static final String KEEP_WORDS_KEY = "keep_words";
|
||||||
private static final String KEEP_WORDS_PATH_KEY = KEEP_WORDS_KEY + "_path";
|
private static final String KEEP_WORDS_PATH_KEY = KEEP_WORDS_KEY + "_path";
|
||||||
private static final String KEEP_WORDS_CASE_KEY = KEEP_WORDS_KEY + "_case"; // for javadoc
|
private static final String KEEP_WORDS_CASE_KEY = KEEP_WORDS_KEY + "_case"; // for javadoc
|
||||||
|
@ -80,14 +82,22 @@ public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
|
||||||
throw new ElasticSearchIllegalArgumentException("keep requires either `" + KEEP_WORDS_KEY + "` or `"
|
throw new ElasticSearchIllegalArgumentException("keep requires either `" + KEEP_WORDS_KEY + "` or `"
|
||||||
+ KEEP_WORDS_PATH_KEY + "` to be configured");
|
+ KEEP_WORDS_PATH_KEY + "` to be configured");
|
||||||
}
|
}
|
||||||
this.enablePositionIncrements = settings.getAsBoolean(ENABLE_POS_INC_KEY, true);
|
if (version.onOrAfter(Version.LUCENE_44) && settings.get(ENABLE_POS_INC_KEY) != null) {
|
||||||
|
throw new ElasticSearchIllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain or use"
|
||||||
|
+ " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
|
||||||
|
}
|
||||||
|
enablePositionIncrements = version.onOrAfter(Version.LUCENE_44) ? true : settings.getAsBoolean(ENABLE_POS_INC_KEY, true);
|
||||||
|
|
||||||
this.keepWords = Analysis.getWordSet(env, settings, KEEP_WORDS_KEY, version);
|
this.keepWords = Analysis.getWordSet(env, settings, KEEP_WORDS_KEY, version);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new KeepWordFilter(enablePositionIncrements, tokenStream, keepWords);
|
if (version.onOrAfter(Version.LUCENE_44)) {
|
||||||
|
return new KeepWordFilter(version, tokenStream, keepWords);
|
||||||
|
}
|
||||||
|
return new KeepWordFilter(version, enablePositionIncrements, tokenStream, keepWords);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,10 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
@ -35,18 +39,25 @@ public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
private final int min;
|
private final int min;
|
||||||
private final int max;
|
private final int max;
|
||||||
private final boolean enablePositionIncrements;
|
private final boolean enablePositionIncrements;
|
||||||
|
private static final String ENABLE_POS_INC_KEY = "enable_position_increments";
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public LengthTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
public LengthTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
min = settings.getAsInt("min", 0);
|
min = settings.getAsInt("min", 0);
|
||||||
max = settings.getAsInt("max", Integer.MAX_VALUE);
|
max = settings.getAsInt("max", Integer.MAX_VALUE);
|
||||||
enablePositionIncrements = settings.getAsBoolean("enabled_position_increments", true);
|
if (version.onOrAfter(Version.LUCENE_44) && settings.get(ENABLE_POS_INC_KEY) != null) {
|
||||||
|
throw new ElasticSearchIllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain or use"
|
||||||
|
+ " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
|
||||||
|
}
|
||||||
|
enablePositionIncrements = version.onOrAfter(Version.LUCENE_44) ? true : settings.getAsBoolean(ENABLE_POS_INC_KEY, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new LengthFilter(enablePositionIncrements, tokenStream, min, max);
|
if (version.onOrAfter(Version.LUCENE_44)) {
|
||||||
|
return new LengthFilter(version, tokenStream, min, max);
|
||||||
|
}
|
||||||
|
return new LengthFilter(version, enablePositionIncrements, tokenStream, min, max);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,6 @@ package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||||
import org.apache.lucene.analysis.ngram.XNGramTokenFilter;
|
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
|
@ -49,10 +48,7 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
if (this.version.onOrAfter(Version.LUCENE_43)) {
|
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
||||||
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
|
return new NGramTokenFilter(version, tokenStream, minGram, maxGram);
|
||||||
return new XNGramTokenFilter(version, tokenStream, minGram, maxGram);
|
|
||||||
}
|
|
||||||
return new NGramTokenFilter(tokenStream, minGram, maxGram);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -19,10 +19,11 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.ngram.Lucene43NGramTokenizer;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||||
import org.apache.lucene.analysis.ngram.XNGramTokenizer;
|
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
@ -98,20 +99,21 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create(Reader reader) {
|
public Tokenizer create(Reader reader) {
|
||||||
if (this.version.onOrAfter(Version.LUCENE_43)) {
|
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
||||||
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
|
if (version.onOrAfter(Version.LUCENE_44)) {
|
||||||
if (matcher == null) {
|
if (matcher == null) {
|
||||||
return new XNGramTokenizer(version, reader, minGram, maxGram);
|
return new NGramTokenizer(version, reader, minGram, maxGram);
|
||||||
} else {
|
} else {
|
||||||
return new XNGramTokenizer(version, reader, minGram, maxGram) {
|
return new NGramTokenizer(version, reader, minGram, maxGram) {
|
||||||
@Override
|
@Override
|
||||||
protected boolean isTokenChar(int chr) {
|
protected boolean isTokenChar(int chr) {
|
||||||
return matcher.isTokenChar(chr);
|
return matcher.isTokenChar(chr);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
return new Lucene43NGramTokenizer(reader, minGram, maxGram);
|
||||||
}
|
}
|
||||||
return new NGramTokenizer(reader, minGram, maxGram);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -19,6 +19,9 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
|
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
@ -34,15 +37,23 @@ import org.elasticsearch.index.settings.IndexSettings;
|
||||||
public class TrimTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class TrimTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final boolean updateOffsets;
|
private final boolean updateOffsets;
|
||||||
|
private static final String UPDATE_OFFSETS_KEY = "update_offsets";
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public TrimTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
public TrimTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
|
if (version.onOrAfter(Version.LUCENE_44) && settings.get(UPDATE_OFFSETS_KEY) != null) {
|
||||||
|
throw new ElasticSearchIllegalArgumentException(UPDATE_OFFSETS_KEY + " is not supported anymore. Please fix your analysis chain or use"
|
||||||
|
+ " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
|
||||||
|
}
|
||||||
this.updateOffsets = settings.getAsBoolean("update_offsets", false);
|
this.updateOffsets = settings.getAsBoolean("update_offsets", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new TrimFilter(tokenStream, updateOffsets);
|
if (version.onOrAfter(Version.LUCENE_44)) {
|
||||||
|
return new TrimFilter(version, tokenStream);
|
||||||
|
}
|
||||||
|
return new TrimFilter(version, tokenStream, updateOffsets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -186,7 +186,7 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
|
||||||
// note, we don't wrap the return value with a BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs) because
|
// note, we don't wrap the return value with a BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs) because
|
||||||
// we rely on our custom XFilteredQuery to do the wrapping if needed, so we don't have the wrap each
|
// we rely on our custom XFilteredQuery to do the wrapping if needed, so we don't have the wrap each
|
||||||
// filter on its own
|
// filter on its own
|
||||||
return cacheValue == DocIdSet.EMPTY_DOCIDSET ? null : cacheValue;
|
return DocIdSets.isEmpty(cacheValue) ? null : cacheValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
|
@ -66,14 +66,18 @@ public class SnapshotDeletionPolicy extends AbstractESDeletionPolicy {
|
||||||
* Called by Lucene. Same as {@link #onCommit(java.util.List)}.
|
* Called by Lucene. Same as {@link #onCommit(java.util.List)}.
|
||||||
*/
|
*/
|
||||||
public void onInit(List<? extends IndexCommit> commits) throws IOException {
|
public void onInit(List<? extends IndexCommit> commits) throws IOException {
|
||||||
|
if (!commits.isEmpty()) { // this might be empty if we create a new index.
|
||||||
|
// the behavior has changed in Lucene 4.4 that calls onInit even with an empty commits list.
|
||||||
onCommit(commits);
|
onCommit(commits);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called by Lucene.. Wraps the provided commits with {@link SnapshotIndexCommit}
|
* Called by Lucene.. Wraps the provided commits with {@link SnapshotIndexCommit}
|
||||||
* and delegates to the wrapped deletion policy.
|
* and delegates to the wrapped deletion policy.
|
||||||
*/
|
*/
|
||||||
public void onCommit(List<? extends IndexCommit> commits) throws IOException {
|
public void onCommit(List<? extends IndexCommit> commits) throws IOException {
|
||||||
|
assert !commits.isEmpty() : "Commits must not be empty";
|
||||||
synchronized (mutex) {
|
synchronized (mutex) {
|
||||||
List<SnapshotIndexCommit> snapshotCommits = wrapCommits(commits);
|
List<SnapshotIndexCommit> snapshotCommits = wrapCommits(commits);
|
||||||
primary.onCommit(snapshotCommits);
|
primary.onCommit(snapshotCommits);
|
||||||
|
@ -95,6 +99,7 @@ public class SnapshotDeletionPolicy extends AbstractESDeletionPolicy {
|
||||||
this.commits = newCommits;
|
this.commits = newCommits;
|
||||||
// the last commit that is not deleted
|
// the last commit that is not deleted
|
||||||
this.lastCommit = newCommits.get(newCommits.size() - 1);
|
this.lastCommit = newCommits.get(newCommits.size() - 1);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1273,7 +1273,6 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine {
|
||||||
config.setReaderTermsIndexDivisor(termIndexDivisor);
|
config.setReaderTermsIndexDivisor(termIndexDivisor);
|
||||||
config.setMaxThreadStates(indexConcurrency);
|
config.setMaxThreadStates(indexConcurrency);
|
||||||
config.setCodec(codecService.codec(codecName));
|
config.setCodec(codecService.codec(codecName));
|
||||||
|
|
||||||
indexWriter = new IndexWriter(store.directory(), config);
|
indexWriter = new IndexWriter(store.directory(), config);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
safeClose(indexWriter);
|
safeClose(indexWriter);
|
||||||
|
|
|
@ -19,10 +19,6 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.ordinals;
|
package org.elasticsearch.index.fielddata.ordinals;
|
||||||
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.LongsRef;
|
import org.apache.lucene.util.LongsRef;
|
||||||
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||||
|
@ -34,11 +30,8 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter;
|
||||||
/** {@link Ordinals} implementation which is efficient at storing field data ordinals for multi-valued or sparse fields. */
|
/** {@link Ordinals} implementation which is efficient at storing field data ordinals for multi-valued or sparse fields. */
|
||||||
public class MultiOrdinals implements Ordinals {
|
public class MultiOrdinals implements Ordinals {
|
||||||
|
|
||||||
// hard-coded in Lucene 4.3 but will be exposed in Lucene 4.4
|
|
||||||
static {
|
|
||||||
assert Lucene.VERSION == Version.LUCENE_43;
|
|
||||||
}
|
|
||||||
private static final int OFFSETS_PAGE_SIZE = 1024;
|
private static final int OFFSETS_PAGE_SIZE = 1024;
|
||||||
|
private static final int OFFSET_INIT_PAGE_COUNT = 16;
|
||||||
|
|
||||||
/** Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. */
|
/** Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. */
|
||||||
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds) {
|
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds) {
|
||||||
|
@ -62,7 +55,7 @@ public class MultiOrdinals implements Ordinals {
|
||||||
multiValued = builder.getNumMultiValuesDocs() > 0;
|
multiValued = builder.getNumMultiValuesDocs() > 0;
|
||||||
numOrds = builder.getNumOrds();
|
numOrds = builder.getNumOrds();
|
||||||
endOffsets = new MonotonicAppendingLongBuffer();
|
endOffsets = new MonotonicAppendingLongBuffer();
|
||||||
ords = new AppendingLongBuffer();
|
ords = new AppendingLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE);
|
||||||
long lastEndOffset = 0;
|
long lastEndOffset = 0;
|
||||||
for (int i = 0; i < builder.maxDoc(); ++i) {
|
for (int i = 0; i < builder.maxDoc(); ++i) {
|
||||||
final LongsRef docOrds = builder.docOrds(i);
|
final LongsRef docOrds = builder.docOrds(i);
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.*;
|
import org.apache.lucene.util.*;
|
||||||
import org.apache.lucene.util.packed.GrowableWriter;
|
import org.apache.lucene.util.packed.GrowableWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
import org.apache.lucene.util.packed.XPagedGrowableWriter;
|
import org.apache.lucene.util.packed.PagedGrowableWriter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
|
@ -117,13 +117,13 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Current position
|
// Current position
|
||||||
private XPagedGrowableWriter positions;
|
private PagedGrowableWriter positions;
|
||||||
// First level (0) of ordinals and pointers to the next level
|
// First level (0) of ordinals and pointers to the next level
|
||||||
private final GrowableWriter firstOrdinals;
|
private final GrowableWriter firstOrdinals;
|
||||||
private XPagedGrowableWriter firstNextLevelSlices;
|
private PagedGrowableWriter firstNextLevelSlices;
|
||||||
// Ordinals and pointers for other levels, starting at 1
|
// Ordinals and pointers for other levels, starting at 1
|
||||||
private final XPagedGrowableWriter[] ordinals;
|
private final PagedGrowableWriter[] ordinals;
|
||||||
private final XPagedGrowableWriter[] nextLevelSlices;
|
private final PagedGrowableWriter[] nextLevelSlices;
|
||||||
private final int[] sizes;
|
private final int[] sizes;
|
||||||
|
|
||||||
private final int startBitsPerValue;
|
private final int startBitsPerValue;
|
||||||
|
@ -132,11 +132,11 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
OrdinalsStore(int maxDoc, int startBitsPerValue, float acceptableOverheadRatio) {
|
OrdinalsStore(int maxDoc, int startBitsPerValue, float acceptableOverheadRatio) {
|
||||||
this.startBitsPerValue = startBitsPerValue;
|
this.startBitsPerValue = startBitsPerValue;
|
||||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||||
positions = new XPagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
|
positions = new PagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
|
||||||
firstOrdinals = new GrowableWriter(startBitsPerValue, maxDoc, acceptableOverheadRatio);
|
firstOrdinals = new GrowableWriter(startBitsPerValue, maxDoc, acceptableOverheadRatio);
|
||||||
// over allocate in order to never worry about the array sizes, 24 entries would allow to store several millions of ordinals per doc...
|
// over allocate in order to never worry about the array sizes, 24 entries would allow to store several millions of ordinals per doc...
|
||||||
ordinals = new XPagedGrowableWriter[24];
|
ordinals = new PagedGrowableWriter[24];
|
||||||
nextLevelSlices = new XPagedGrowableWriter[24];
|
nextLevelSlices = new PagedGrowableWriter[24];
|
||||||
sizes = new int[24];
|
sizes = new int[24];
|
||||||
Arrays.fill(sizes, 1); // reserve the 1st slice on every level
|
Arrays.fill(sizes, 1); // reserve the 1st slice on every level
|
||||||
}
|
}
|
||||||
|
@ -146,7 +146,7 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
final long newSlice = sizes[level]++;
|
final long newSlice = sizes[level]++;
|
||||||
// Lazily allocate ordinals
|
// Lazily allocate ordinals
|
||||||
if (ordinals[level] == null) {
|
if (ordinals[level] == null) {
|
||||||
ordinals[level] = new XPagedGrowableWriter(8L * numSlots(level), PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
|
ordinals[level] = new PagedGrowableWriter(8L * numSlots(level), PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
|
||||||
} else {
|
} else {
|
||||||
ordinals[level] = ordinals[level].grow(sizes[level] * numSlots(level));
|
ordinals[level] = ordinals[level].grow(sizes[level] * numSlots(level));
|
||||||
if (nextLevelSlices[level] != null) {
|
if (nextLevelSlices[level] != null) {
|
||||||
|
@ -167,7 +167,7 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
} else {
|
} else {
|
||||||
final long newSlice = newSlice(1);
|
final long newSlice = newSlice(1);
|
||||||
if (firstNextLevelSlices == null) {
|
if (firstNextLevelSlices == null) {
|
||||||
firstNextLevelSlices = new XPagedGrowableWriter(firstOrdinals.size(), PAGE_SIZE, 3, acceptableOverheadRatio);
|
firstNextLevelSlices = new PagedGrowableWriter(firstOrdinals.size(), PAGE_SIZE, 3, acceptableOverheadRatio);
|
||||||
}
|
}
|
||||||
firstNextLevelSlices.set(docID, newSlice);
|
firstNextLevelSlices.set(docID, newSlice);
|
||||||
final long offset = startOffset(1, newSlice);
|
final long offset = startOffset(1, newSlice);
|
||||||
|
@ -183,7 +183,7 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
// reached the end of the slice, allocate a new one on the next level
|
// reached the end of the slice, allocate a new one on the next level
|
||||||
final long newSlice = newSlice(level + 1);
|
final long newSlice = newSlice(level + 1);
|
||||||
if (nextLevelSlices[level] == null) {
|
if (nextLevelSlices[level] == null) {
|
||||||
nextLevelSlices[level] = new XPagedGrowableWriter(sizes[level], PAGE_SIZE, 1, acceptableOverheadRatio);
|
nextLevelSlices[level] = new PagedGrowableWriter(sizes[level], PAGE_SIZE, 1, acceptableOverheadRatio);
|
||||||
}
|
}
|
||||||
nextLevelSlices[level].set(sliceID(level, offset), newSlice);
|
nextLevelSlices[level].set(sliceID(level, offset), newSlice);
|
||||||
++level;
|
++level;
|
||||||
|
|
|
@ -60,7 +60,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
return FSTBytesAtomicFieldData.empty(reader.maxDoc());
|
return FSTBytesAtomicFieldData.empty(reader.maxDoc());
|
||||||
}
|
}
|
||||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||||
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
||||||
final IntsRef scratch = new IntsRef();
|
final IntsRef scratch = new IntsRef();
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,7 @@ public class CommitPoints implements Iterable<CommitPoint> {
|
||||||
private final ImmutableList<CommitPoint> commitPoints;
|
private final ImmutableList<CommitPoint> commitPoints;
|
||||||
|
|
||||||
public CommitPoints(List<CommitPoint> commitPoints) {
|
public CommitPoints(List<CommitPoint> commitPoints) {
|
||||||
CollectionUtil.quickSort(commitPoints, new Comparator<CommitPoint>() {
|
CollectionUtil.introSort(commitPoints, new Comparator<CommitPoint>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(CommitPoint o1, CommitPoint o2) {
|
public int compare(CommitPoint o1, CommitPoint o2) {
|
||||||
return (o2.version() < o1.version() ? -1 : (o2.version() == o1.version() ? 0 : 1));
|
return (o2.version() < o1.version() ? -1 : (o2.version() == o1.version() ? 0 : 1));
|
||||||
|
|
|
@ -27,13 +27,11 @@ public abstract class AbstractMergePolicyProvider<MP extends MergePolicy> extend
|
||||||
|
|
||||||
public static final String INDEX_COMPOUND_FORMAT = "index.compound_format";
|
public static final String INDEX_COMPOUND_FORMAT = "index.compound_format";
|
||||||
|
|
||||||
protected volatile boolean compoundFormat;
|
|
||||||
protected volatile double noCFSRatio;
|
protected volatile double noCFSRatio;
|
||||||
|
|
||||||
protected AbstractMergePolicyProvider(Store store) {
|
protected AbstractMergePolicyProvider(Store store) {
|
||||||
super(store.shardId(), store.indexSettings());
|
super(store.shardId(), store.indexSettings());
|
||||||
this.noCFSRatio = parseNoCFSRatio(indexSettings.get(INDEX_COMPOUND_FORMAT, Boolean.toString(store.suggestUseCompoundFile())));
|
this.noCFSRatio = parseNoCFSRatio(indexSettings.get(INDEX_COMPOUND_FORMAT, Boolean.toString(store.suggestUseCompoundFile())));
|
||||||
this.compoundFormat = noCFSRatio != 0.0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static double parseNoCFSRatio(String noCFSRatio) {
|
public static double parseNoCFSRatio(String noCFSRatio) {
|
||||||
|
|
|
@ -83,7 +83,6 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
|
||||||
mergePolicy.setMergeFactor(mergeFactor);
|
mergePolicy.setMergeFactor(mergeFactor);
|
||||||
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
||||||
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
||||||
mergePolicy.setUseCompoundFile(compoundFormat);
|
|
||||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||||
|
|
||||||
policies.add(mergePolicy);
|
policies.add(mergePolicy);
|
||||||
|
@ -140,14 +139,11 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
|
||||||
}
|
}
|
||||||
|
|
||||||
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(LogByteSizeMergePolicyProvider.this.noCFSRatio)));
|
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(LogByteSizeMergePolicyProvider.this.noCFSRatio)));
|
||||||
final boolean compoundFormat = noCFSRatio != 0.0;
|
|
||||||
if (noCFSRatio != LogByteSizeMergePolicyProvider.this.noCFSRatio) {
|
if (noCFSRatio != LogByteSizeMergePolicyProvider.this.noCFSRatio) {
|
||||||
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogByteSizeMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogByteSizeMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
||||||
LogByteSizeMergePolicyProvider.this.compoundFormat = compoundFormat;
|
|
||||||
LogByteSizeMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
LogByteSizeMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
||||||
for (CustomLogByteSizeMergePolicy policy : policies) {
|
for (CustomLogByteSizeMergePolicy policy : policies) {
|
||||||
policy.setNoCFSRatio(noCFSRatio);
|
policy.setNoCFSRatio(noCFSRatio);
|
||||||
policy.setUseCompoundFile(compoundFormat);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -86,7 +86,6 @@ public class LogDocMergePolicyProvider extends AbstractMergePolicyProvider<LogDo
|
||||||
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
||||||
mergePolicy.setMergeFactor(mergeFactor);
|
mergePolicy.setMergeFactor(mergeFactor);
|
||||||
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
||||||
mergePolicy.setUseCompoundFile(compoundFormat);
|
|
||||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||||
policies.add(mergePolicy);
|
policies.add(mergePolicy);
|
||||||
return mergePolicy;
|
return mergePolicy;
|
||||||
|
@ -130,11 +129,9 @@ public class LogDocMergePolicyProvider extends AbstractMergePolicyProvider<LogDo
|
||||||
final boolean compoundFormat = noCFSRatio != 0.0;
|
final boolean compoundFormat = noCFSRatio != 0.0;
|
||||||
if (noCFSRatio != LogDocMergePolicyProvider.this.noCFSRatio) {
|
if (noCFSRatio != LogDocMergePolicyProvider.this.noCFSRatio) {
|
||||||
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogDocMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogDocMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
||||||
LogDocMergePolicyProvider.this.compoundFormat = compoundFormat;
|
|
||||||
LogDocMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
LogDocMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
||||||
for (CustomLogDocMergePolicy policy : policies) {
|
for (CustomLogDocMergePolicy policy : policies) {
|
||||||
policy.setNoCFSRatio(noCFSRatio);
|
policy.setNoCFSRatio(noCFSRatio);
|
||||||
policy.setUseCompoundFile(compoundFormat);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,7 +99,6 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
|
||||||
} else {
|
} else {
|
||||||
mergePolicy = new CustomTieredMergePolicyProvider(this);
|
mergePolicy = new CustomTieredMergePolicyProvider(this);
|
||||||
}
|
}
|
||||||
mergePolicy.setUseCompoundFile(compoundFormat);
|
|
||||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||||
mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed);
|
mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed);
|
||||||
mergePolicy.setFloorSegmentMB(floorSegment.mbFrac());
|
mergePolicy.setFloorSegmentMB(floorSegment.mbFrac());
|
||||||
|
@ -191,14 +190,11 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
|
||||||
}
|
}
|
||||||
|
|
||||||
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(TieredMergePolicyProvider.this.noCFSRatio)));
|
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(TieredMergePolicyProvider.this.noCFSRatio)));
|
||||||
final boolean compoundFormat = noCFSRatio != 0.0;
|
|
||||||
if (noCFSRatio != TieredMergePolicyProvider.this.noCFSRatio) {
|
if (noCFSRatio != TieredMergePolicyProvider.this.noCFSRatio) {
|
||||||
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(TieredMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(TieredMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
||||||
TieredMergePolicyProvider.this.compoundFormat = compoundFormat;
|
|
||||||
TieredMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
TieredMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
||||||
for (CustomTieredMergePolicyProvider policy : policies) {
|
for (CustomTieredMergePolicyProvider policy : policies) {
|
||||||
policy.setNoCFSRatio(noCFSRatio);
|
policy.setNoCFSRatio(noCFSRatio);
|
||||||
policy.setUseCompoundFile(compoundFormat);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,8 +59,7 @@ public class ConcurrentMergeSchedulerProvider extends MergeSchedulerProvider {
|
||||||
@Override
|
@Override
|
||||||
public MergeScheduler newMergeScheduler() {
|
public MergeScheduler newMergeScheduler() {
|
||||||
CustomConcurrentMergeScheduler concurrentMergeScheduler = new CustomConcurrentMergeScheduler(logger, shardId, this);
|
CustomConcurrentMergeScheduler concurrentMergeScheduler = new CustomConcurrentMergeScheduler(logger, shardId, this);
|
||||||
concurrentMergeScheduler.setMaxMergeCount(maxMergeCount);
|
concurrentMergeScheduler.setMaxMergesAndThreads(maxMergeCount, maxThreadCount);
|
||||||
concurrentMergeScheduler.setMaxThreadCount(maxThreadCount);
|
|
||||||
schedulers.add(concurrentMergeScheduler);
|
schedulers.add(concurrentMergeScheduler);
|
||||||
return concurrentMergeScheduler;
|
return concurrentMergeScheduler;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
import org.elasticsearch.ElasticSearchIllegalStateException;
|
||||||
import org.elasticsearch.common.bytes.HashedBytesArray;
|
import org.elasticsearch.common.bytes.HashedBytesArray;
|
||||||
|
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||||
import org.elasticsearch.common.lucene.search.ApplyAcceptedDocsFilter;
|
import org.elasticsearch.common.lucene.search.ApplyAcceptedDocsFilter;
|
||||||
import org.elasticsearch.common.lucene.search.Queries;
|
import org.elasticsearch.common.lucene.search.Queries;
|
||||||
import org.elasticsearch.common.lucene.search.TermFilter;
|
import org.elasticsearch.common.lucene.search.TermFilter;
|
||||||
|
@ -217,7 +218,7 @@ public class ChildrenQuery extends Query implements SearchContext.Rewrite {
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||||
DocIdSet parentsSet = parentFilter.getDocIdSet(context, acceptDocs);
|
DocIdSet parentsSet = parentFilter.getDocIdSet(context, acceptDocs);
|
||||||
if (parentsSet == null || parentsSet == DocIdSet.EMPTY_DOCIDSET || remaining == 0) {
|
if (DocIdSets.isEmpty(parentsSet) || remaining == 0) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.search.child;
|
package org.elasticsearch.index.search.child;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||||
|
|
||||||
import gnu.trove.map.hash.TObjectFloatHashMap;
|
import gnu.trove.map.hash.TObjectFloatHashMap;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
@ -212,7 +214,7 @@ public class ParentQuery extends Query implements SearchContext.Rewrite {
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||||
DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, acceptDocs);
|
DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, acceptDocs);
|
||||||
if (childrenDocSet == null || childrenDocSet == DocIdSet.EMPTY_DOCIDSET) {
|
if (DocIdSets.isEmpty(childrenDocSet)) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
IdReaderTypeCache idTypeCache = searchContext.idCache().reader(context.reader()).type(parentType);
|
IdReaderTypeCache idTypeCache = searchContext.idCache().reader(context.reader()).type(parentType);
|
||||||
|
|
|
@ -82,7 +82,6 @@ import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
|
||||||
import org.elasticsearch.common.component.AbstractComponent;
|
import org.elasticsearch.common.component.AbstractComponent;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
@ -91,7 +90,6 @@ import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
||||||
import org.elasticsearch.index.analysis.*;
|
import org.elasticsearch.index.analysis.*;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -268,7 +266,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create(Reader reader) {
|
public Tokenizer create(Reader reader) {
|
||||||
return new NGramTokenizer(reader);
|
return new NGramTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
@ -280,7 +278,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create(Reader reader) {
|
public Tokenizer create(Reader reader) {
|
||||||
return new NGramTokenizer(reader);
|
return new NGramTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
@ -292,7 +290,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create(Reader reader) {
|
public Tokenizer create(Reader reader) {
|
||||||
return new EdgeNGramTokenizer(reader, EdgeNGramTokenizer.DEFAULT_SIDE, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
return new EdgeNGramTokenizer(Lucene.ANALYZER_VERSION, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
@ -304,7 +302,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create(Reader reader) {
|
public Tokenizer create(Reader reader) {
|
||||||
return new EdgeNGramTokenizer(reader, EdgeNGramTokenizer.DEFAULT_SIDE, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
return new EdgeNGramTokenizer(Lucene.ANALYZER_VERSION, reader, EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
@ -357,7 +355,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new TrimFilter(tokenStream, false);
|
return new TrimFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
@ -393,7 +391,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new LengthFilter(true, tokenStream, 0, Integer.MAX_VALUE);
|
return new LengthFilter(Lucene.ANALYZER_VERSION, tokenStream, 0, Integer.MAX_VALUE);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
@ -477,7 +475,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new NGramTokenFilter(tokenStream);
|
return new NGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
@ -489,7 +487,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new NGramTokenFilter(tokenStream);
|
return new NGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
@ -501,7 +499,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new EdgeNGramTokenFilter(tokenStream, EdgeNGramTokenFilter.DEFAULT_SIDE, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
return new EdgeNGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
@ -513,7 +511,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new EdgeNGramTokenFilter(tokenStream, EdgeNGramTokenFilter.DEFAULT_SIDE, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
return new EdgeNGramTokenFilter(Lucene.ANALYZER_VERSION, tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
|
|
@ -131,7 +131,7 @@ public class HotThreads {
|
||||||
// sort by delta CPU time on thread.
|
// sort by delta CPU time on thread.
|
||||||
List<MyThreadInfo> hotties = new ArrayList<MyThreadInfo>(threadInfos.values());
|
List<MyThreadInfo> hotties = new ArrayList<MyThreadInfo>(threadInfos.values());
|
||||||
// skip that for now
|
// skip that for now
|
||||||
CollectionUtil.quickSort(hotties, new Comparator<MyThreadInfo>() {
|
CollectionUtil.introSort(hotties, new Comparator<MyThreadInfo>() {
|
||||||
public int compare(MyThreadInfo o1, MyThreadInfo o2) {
|
public int compare(MyThreadInfo o1, MyThreadInfo o2) {
|
||||||
if ("cpu".equals(type)) {
|
if ("cpu".equals(type)) {
|
||||||
return (int) (o2.cpuTime - o1.cpuTime);
|
return (int) (o2.cpuTime - o1.cpuTime);
|
||||||
|
|
|
@ -142,7 +142,7 @@ public class PlainHighlighter implements Highlighter {
|
||||||
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
|
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
|
||||||
}
|
}
|
||||||
if (field.scoreOrdered()) {
|
if (field.scoreOrdered()) {
|
||||||
CollectionUtil.quickSort(fragsList, new Comparator<TextFragment>() {
|
CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
|
||||||
public int compare(TextFragment o1, TextFragment o2) {
|
public int compare(TextFragment o1, TextFragment o2) {
|
||||||
return Math.round(o2.getScore() - o1.getScore());
|
return Math.round(o2.getScore() - o1.getScore());
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,7 +57,7 @@ public final class FragmentBuilderHelper {
|
||||||
* the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather
|
* the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather
|
||||||
* than in this hack... aka. "we are are working on in!" */
|
* than in this hack... aka. "we are are working on in!" */
|
||||||
final List<SubInfo> subInfos = fragInfo.getSubInfos();
|
final List<SubInfo> subInfos = fragInfo.getSubInfos();
|
||||||
CollectionUtil.quickSort(subInfos, new Comparator<SubInfo>() {
|
CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(SubInfo o1, SubInfo o2) {
|
public int compare(SubInfo o1, SubInfo o2) {
|
||||||
int startOffset = o1.getTermsOffsets().get(0).getStartOffset();
|
int startOffset = o1.getTermsOffsets().get(0).getStartOffset();
|
||||||
|
|
|
@ -17,22 +17,11 @@ package org.elasticsearch.search.rescore;
|
||||||
* specific language governing permissions and limitations
|
* specific language governing permissions and limitations
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.ComplexExplanation;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.DocIdSet;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
import org.apache.lucene.search.Explanation;
|
|
||||||
import org.apache.lucene.search.Filter;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
import org.apache.lucene.search.TopDocs;
|
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.SorterTemplate;
|
import org.apache.lucene.util.IntroSorter;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser.Token;
|
import org.elasticsearch.common.xcontent.XContentParser.Token;
|
||||||
|
@ -40,6 +29,10 @@ import org.elasticsearch.index.query.ParsedQuery;
|
||||||
import org.elasticsearch.search.internal.ContextIndexSearcher;
|
import org.elasticsearch.search.internal.ContextIndexSearcher;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
final class QueryRescorer implements Rescorer {
|
final class QueryRescorer implements Rescorer {
|
||||||
|
|
||||||
public static final Rescorer INSTANCE = new QueryRescorer();
|
public static final Rescorer INSTANCE = new QueryRescorer();
|
||||||
|
@ -163,10 +156,10 @@ final class QueryRescorer implements Rescorer {
|
||||||
private TopDocs merge(TopDocs primary, TopDocs secondary, QueryRescoreContext context) {
|
private TopDocs merge(TopDocs primary, TopDocs secondary, QueryRescoreContext context) {
|
||||||
DocIdSorter sorter = new DocIdSorter();
|
DocIdSorter sorter = new DocIdSorter();
|
||||||
sorter.array = primary.scoreDocs;
|
sorter.array = primary.scoreDocs;
|
||||||
sorter.mergeSort(0, sorter.array.length-1);
|
sorter.sort(0, sorter.array.length);
|
||||||
ScoreDoc[] primaryDocs = sorter.array;
|
ScoreDoc[] primaryDocs = sorter.array;
|
||||||
sorter.array = secondary.scoreDocs;
|
sorter.array = secondary.scoreDocs;
|
||||||
sorter.mergeSort(0, sorter.array.length-1);
|
sorter.sort(0, sorter.array.length);
|
||||||
ScoreDoc[] secondaryDocs = sorter.array;
|
ScoreDoc[] secondaryDocs = sorter.array;
|
||||||
int j = 0;
|
int j = 0;
|
||||||
float primaryWeight = context.queryWeight();
|
float primaryWeight = context.queryWeight();
|
||||||
|
@ -180,12 +173,12 @@ final class QueryRescorer implements Rescorer {
|
||||||
}
|
}
|
||||||
ScoreSorter scoreSorter = new ScoreSorter();
|
ScoreSorter scoreSorter = new ScoreSorter();
|
||||||
scoreSorter.array = primaryDocs;
|
scoreSorter.array = primaryDocs;
|
||||||
scoreSorter.mergeSort(0, primaryDocs.length-1);
|
scoreSorter.sort(0, primaryDocs.length);
|
||||||
primary.setMaxScore(primaryDocs[0].score);
|
primary.setMaxScore(primaryDocs[0].score);
|
||||||
return primary;
|
return primary;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class DocIdSorter extends SorterTemplate {
|
private static final class DocIdSorter extends IntroSorter {
|
||||||
private ScoreDoc[] array;
|
private ScoreDoc[] array;
|
||||||
private ScoreDoc pivot;
|
private ScoreDoc pivot;
|
||||||
@Override
|
@Override
|
||||||
|
@ -222,7 +215,7 @@ final class QueryRescorer implements Rescorer {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class ScoreSorter extends SorterTemplate {
|
private static final class ScoreSorter extends IntroSorter {
|
||||||
private ScoreDoc[] array;
|
private ScoreDoc[] array;
|
||||||
private ScoreDoc pivot;
|
private ScoreDoc pivot;
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -5,13 +5,13 @@
|
||||||
"my_keep_filter":{
|
"my_keep_filter":{
|
||||||
"type":"keep",
|
"type":"keep",
|
||||||
"keep_words" : ["Hello", "worlD"],
|
"keep_words" : ["Hello", "worlD"],
|
||||||
"enable_position_increments" : true,
|
|
||||||
"keep_words_case" : true
|
"keep_words_case" : true
|
||||||
},
|
},
|
||||||
"my_case_sensitive_keep_filter":{
|
"my_case_sensitive_keep_filter":{
|
||||||
"type":"keep",
|
"type":"keep",
|
||||||
"keep_words" : ["Hello", "worlD"],
|
"keep_words" : ["Hello", "worlD"],
|
||||||
"enable_position_increments" : false
|
"enable_position_increments" : false,
|
||||||
|
"version" : "4.2"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,73 +50,40 @@ public class MergePolicySettingsTest {
|
||||||
public void testCompoundFileSettings() throws IOException {
|
public void testCompoundFileSettings() throws IOException {
|
||||||
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
||||||
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new TieredMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
|
assertThat(new TieredMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new TieredMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new TieredMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new TieredMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new TieredMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new TieredMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new TieredMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new TieredMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new TieredMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new TieredMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogByteSizeMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new LogDocMergePolicyProvider(createStore(build(true)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
|
assertThat(new LogDocMergePolicyProvider(createStore(build(0.5)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.5));
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new LogDocMergePolicyProvider(createStore(build(1.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new LogDocMergePolicyProvider(createStore(build("true")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(new LogDocMergePolicyProvider(createStore(build("True")), service).newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogDocMergePolicyProvider(createStore(build("False")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogDocMergePolicyProvider(createStore(build("false")), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogDocMergePolicyProvider(createStore(build(false)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogDocMergePolicyProvider(createStore(build(0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(new LogDocMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(new LogDocMergePolicyProvider(createStore(build(0.0)), service).newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -150,57 +117,45 @@ public class MergePolicySettingsTest {
|
||||||
{
|
{
|
||||||
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
||||||
TieredMergePolicyProvider mp = new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
|
TieredMergePolicyProvider mp = new TieredMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
|
|
||||||
service.refreshSettings(build(1.0));
|
service.refreshSettings(build(1.0));
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
|
|
||||||
service.refreshSettings(build(0.1));
|
service.refreshSettings(build(0.1));
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
|
||||||
|
|
||||||
service.refreshSettings(build(0.0));
|
service.refreshSettings(build(0.0));
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
||||||
LogByteSizeMergePolicyProvider mp = new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
|
LogByteSizeMergePolicyProvider mp = new LogByteSizeMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
|
|
||||||
service.refreshSettings(build(1.0));
|
service.refreshSettings(build(1.0));
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
|
|
||||||
service.refreshSettings(build(0.1));
|
service.refreshSettings(build(0.1));
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
|
||||||
|
|
||||||
service.refreshSettings(build(0.0));
|
service.refreshSettings(build(0.0));
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
IndexSettingsService service = new IndexSettingsService(new Index("test"), EMPTY_SETTINGS);
|
||||||
LogDocMergePolicyProvider mp = new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
|
LogDocMergePolicyProvider mp = new LogDocMergePolicyProvider(createStore(EMPTY_SETTINGS), service);
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
|
|
||||||
service.refreshSettings(build(1.0));
|
service.refreshSettings(build(1.0));
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(1.0));
|
||||||
|
|
||||||
service.refreshSettings(build(0.1));
|
service.refreshSettings(build(0.1));
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(true));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.1));
|
||||||
|
|
||||||
service.refreshSettings(build(0.0));
|
service.refreshSettings(build(0.0));
|
||||||
assertThat(mp.newMergePolicy().getUseCompoundFile(), equalTo(false));
|
|
||||||
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
assertThat(mp.newMergePolicy().getNoCFSRatio(), equalTo(0.0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue