parent
5df9c048fe
commit
223dab8921
2
pom.xml
2
pom.xml
|
@ -31,7 +31,7 @@
|
|||
</parent>
|
||||
|
||||
<properties>
|
||||
<lucene.version>4.9.0</lucene.version>
|
||||
<lucene.version>4.10.0</lucene.version>
|
||||
<tests.jvms>auto</tests.jvms>
|
||||
<tests.shuffle>true</tests.shuffle>
|
||||
<tests.output>onerror</tests.output>
|
||||
|
|
|
@ -1,70 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* A simple analyzer wrapper, that doesn't allow to wrap components or reader. By disallowing
|
||||
* it, it means that the thread local resources will be delegated to the wrapped analyzer, and not
|
||||
* also be allocated on this analyzer.
|
||||
*
|
||||
* This solves the problem of per field analyzer wrapper, where it also maintains a thread local
|
||||
* per field token stream components, while it can safely delegate those and not also hold these
|
||||
* data structures, which can become expensive memory wise.
|
||||
*/
|
||||
public abstract class SimpleAnalyzerWrapper extends AnalyzerWrapper {
|
||||
|
||||
static {
|
||||
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5803)";
|
||||
}
|
||||
|
||||
public SimpleAnalyzerWrapper() {
|
||||
super(new DelegatingReuseStrategy());
|
||||
((DelegatingReuseStrategy) getReuseStrategy()).wrapper = this;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
return super.wrapComponents(fieldName, components);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final Reader wrapReader(String fieldName, Reader reader) {
|
||||
return super.wrapReader(fieldName, reader);
|
||||
}
|
||||
|
||||
private static class DelegatingReuseStrategy extends ReuseStrategy {
|
||||
|
||||
AnalyzerWrapper wrapper;
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
|
||||
Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
|
||||
return wrappedAnalyzer.getReuseStrategy().getReusableComponents(wrappedAnalyzer, fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) {
|
||||
Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
|
||||
wrappedAnalyzer.getReuseStrategy().setReusableComponents(wrappedAnalyzer, fieldName, components);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,128 +0,0 @@
|
|||
package org.apache.lucene.expressions;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
|
||||
/**
|
||||
* Simple class that binds expression variable names to {@link SortField}s
|
||||
* or other {@link Expression}s.
|
||||
* <p>
|
||||
* Example usage:
|
||||
* <pre class="prettyprint">
|
||||
* XSimpleBindings bindings = new XSimpleBindings();
|
||||
* // document's text relevance score
|
||||
* bindings.add(new SortField("_score", SortField.Type.SCORE));
|
||||
* // integer NumericDocValues field (or from FieldCache)
|
||||
* bindings.add(new SortField("popularity", SortField.Type.INT));
|
||||
* // another expression
|
||||
* bindings.add("recency", myRecencyExpression);
|
||||
*
|
||||
* // create a sort field in reverse order
|
||||
* Sort sort = new Sort(expr.getSortField(bindings, true));
|
||||
* </pre>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class XSimpleBindings extends Bindings {
|
||||
|
||||
static {
|
||||
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5806)";
|
||||
}
|
||||
|
||||
final Map<String,Object> map = new HashMap<>();
|
||||
|
||||
/** Creates a new empty Bindings */
|
||||
public XSimpleBindings() {}
|
||||
|
||||
/**
|
||||
* Adds a SortField to the bindings.
|
||||
* <p>
|
||||
* This can be used to reference a DocValuesField, a field from
|
||||
* FieldCache, the document's score, etc.
|
||||
*/
|
||||
public void add(SortField sortField) {
|
||||
map.put(sortField.getField(), sortField);
|
||||
}
|
||||
|
||||
/**
|
||||
* Bind a {@link ValueSource} directly to the given name.
|
||||
*/
|
||||
public void add(String name, ValueSource source) { map.put(name, source); }
|
||||
|
||||
/**
|
||||
* Adds an Expression to the bindings.
|
||||
* <p>
|
||||
* This can be used to reference expressions from other expressions.
|
||||
*/
|
||||
public void add(String name, Expression expression) {
|
||||
map.put(name, expression);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueSource getValueSource(String name) {
|
||||
Object o = map.get(name);
|
||||
if (o == null) {
|
||||
throw new IllegalArgumentException("Invalid reference '" + name + "'");
|
||||
} else if (o instanceof Expression) {
|
||||
return ((Expression)o).getValueSource(this);
|
||||
} else if (o instanceof ValueSource) {
|
||||
return ((ValueSource)o);
|
||||
}
|
||||
SortField field = (SortField) o;
|
||||
switch(field.getType()) {
|
||||
case INT:
|
||||
return new IntFieldSource(field.getField());
|
||||
case LONG:
|
||||
return new LongFieldSource(field.getField());
|
||||
case FLOAT:
|
||||
return new FloatFieldSource(field.getField());
|
||||
case DOUBLE:
|
||||
return new DoubleFieldSource(field.getField());
|
||||
case SCORE:
|
||||
return getScoreValueSource();
|
||||
default:
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverses the graph of bindings, checking there are no cycles or missing references
|
||||
* @throws IllegalArgumentException if the bindings is inconsistent
|
||||
*/
|
||||
public void validate() {
|
||||
for (Object o : map.values()) {
|
||||
if (o instanceof Expression) {
|
||||
Expression expr = (Expression) o;
|
||||
try {
|
||||
expr.getValueSource(this);
|
||||
} catch (StackOverflowError e) {
|
||||
throw new IllegalArgumentException("Recursion Error: Cycle detected originating in (" + expr.sourceText + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,614 +0,0 @@
|
|||
package org.apache.lucene.expressions.js;
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.ParseException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.antlr.runtime.ANTLRStringStream;
|
||||
import org.antlr.runtime.CharStream;
|
||||
import org.antlr.runtime.CommonTokenStream;
|
||||
import org.antlr.runtime.RecognitionException;
|
||||
import org.antlr.runtime.tree.Tree;
|
||||
import org.apache.lucene.expressions.Expression;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.objectweb.asm.ClassWriter;
|
||||
import org.objectweb.asm.Label;
|
||||
import org.objectweb.asm.Opcodes;
|
||||
import org.objectweb.asm.Type;
|
||||
import org.objectweb.asm.commons.GeneratorAdapter;
|
||||
|
||||
/**
|
||||
* An expression compiler for javascript expressions.
|
||||
* <p>
|
||||
* Example:
|
||||
* <pre class="prettyprint">
|
||||
* Expression foo = XJavascriptCompiler.compile("((0.3*popularity)/10.0)+(0.7*score)");
|
||||
* </pre>
|
||||
* <p>
|
||||
* See the {@link org.apache.lucene.expressions.js package documentation} for
|
||||
* the supported syntax and default functions.
|
||||
* <p>
|
||||
* You can compile with an alternate set of functions via {@link #compile(String, Map, ClassLoader)}.
|
||||
* For example:
|
||||
* <pre class="prettyprint">
|
||||
* Map<String,Method> functions = new HashMap<>();
|
||||
* // add all the default functions
|
||||
* functions.putAll(XJavascriptCompiler.DEFAULT_FUNCTIONS);
|
||||
* // add cbrt()
|
||||
* functions.put("cbrt", Math.class.getMethod("cbrt", double.class));
|
||||
* // call compile with customized function map
|
||||
* Expression foo = XJavascriptCompiler.compile("cbrt(score)+ln(popularity)",
|
||||
* functions,
|
||||
* getClass().getClassLoader());
|
||||
* </pre>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class XJavascriptCompiler {
|
||||
|
||||
static {
|
||||
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5806)";
|
||||
}
|
||||
|
||||
static final class Loader extends ClassLoader {
|
||||
Loader(ClassLoader parent) {
|
||||
super(parent);
|
||||
}
|
||||
|
||||
public Class<? extends Expression> define(String className, byte[] bytecode) {
|
||||
return defineClass(className, bytecode, 0, bytecode.length).asSubclass(Expression.class);
|
||||
}
|
||||
}
|
||||
|
||||
private static final int CLASSFILE_VERSION = Opcodes.V1_7;
|
||||
|
||||
// We use the same class name for all generated classes as they all have their own class loader.
|
||||
// The source code is displayed as "source file name" in stack trace.
|
||||
private static final String COMPILED_EXPRESSION_CLASS = XJavascriptCompiler.class.getName() + "$CompiledExpression";
|
||||
private static final String COMPILED_EXPRESSION_INTERNAL = COMPILED_EXPRESSION_CLASS.replace('.', '/');
|
||||
|
||||
private static final Type EXPRESSION_TYPE = Type.getType(Expression.class);
|
||||
private static final Type FUNCTION_VALUES_TYPE = Type.getType(FunctionValues.class);
|
||||
|
||||
private static final org.objectweb.asm.commons.Method
|
||||
EXPRESSION_CTOR = getMethod("void <init>(String, String[])"),
|
||||
EVALUATE_METHOD = getMethod("double evaluate(int, " + FunctionValues.class.getName() + "[])"),
|
||||
DOUBLE_VAL_METHOD = getMethod("double doubleVal(int)");
|
||||
|
||||
// to work around import clash:
|
||||
private static org.objectweb.asm.commons.Method getMethod(String method) {
|
||||
return org.objectweb.asm.commons.Method.getMethod(method);
|
||||
}
|
||||
|
||||
// This maximum length is theoretically 65535 bytes, but as its CESU-8 encoded we dont know how large it is in bytes, so be safe
|
||||
// rcmuir: "If your ranking function is that large you need to check yourself into a mental institution!"
|
||||
private static final int MAX_SOURCE_LENGTH = 16384;
|
||||
|
||||
private final String sourceText;
|
||||
private final Map<String, Integer> externalsMap = new LinkedHashMap<>();
|
||||
private final ClassWriter classWriter = new ClassWriter(ClassWriter.COMPUTE_FRAMES | ClassWriter.COMPUTE_MAXS);
|
||||
private GeneratorAdapter gen;
|
||||
|
||||
private final Map<String,Method> functions;
|
||||
|
||||
/**
|
||||
* Compiles the given expression.
|
||||
*
|
||||
* @param sourceText The expression to compile
|
||||
* @return A new compiled expression
|
||||
* @throws ParseException on failure to compile
|
||||
*/
|
||||
public static Expression compile(String sourceText) throws ParseException {
|
||||
return new XJavascriptCompiler(sourceText).compileExpression(XJavascriptCompiler.class.getClassLoader());
|
||||
}
|
||||
|
||||
/**
|
||||
* Compiles the given expression with the supplied custom functions.
|
||||
* <p>
|
||||
* Functions must be {@code public static}, return {@code double} and
|
||||
* can take from zero to 256 {@code double} parameters.
|
||||
*
|
||||
* @param sourceText The expression to compile
|
||||
* @param functions map of String names to functions
|
||||
* @param parent a {@code ClassLoader} that should be used as the parent of the loaded class.
|
||||
* It must contain all classes referred to by the given {@code functions}.
|
||||
* @return A new compiled expression
|
||||
* @throws ParseException on failure to compile
|
||||
*/
|
||||
public static Expression compile(String sourceText, Map<String,Method> functions, ClassLoader parent) throws ParseException {
|
||||
if (parent == null) {
|
||||
throw new NullPointerException("A parent ClassLoader must be given.");
|
||||
}
|
||||
for (Method m : functions.values()) {
|
||||
checkFunction(m, parent);
|
||||
}
|
||||
return new XJavascriptCompiler(sourceText, functions).compileExpression(parent);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is unused, it is just here to make sure that the function signatures don't change.
|
||||
* If this method fails to compile, you also have to change the byte code generator to correctly
|
||||
* use the FunctionValues class.
|
||||
*/
|
||||
@SuppressWarnings({"unused", "null"})
|
||||
private static void unusedTestCompile() {
|
||||
FunctionValues f = null;
|
||||
double ret = f.doubleVal(2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a compiler for expressions.
|
||||
* @param sourceText The expression to compile
|
||||
*/
|
||||
private XJavascriptCompiler(String sourceText) {
|
||||
this(sourceText, DEFAULT_FUNCTIONS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a compiler for expressions with specific set of functions
|
||||
* @param sourceText The expression to compile
|
||||
*/
|
||||
private XJavascriptCompiler(String sourceText, Map<String,Method> functions) {
|
||||
if (sourceText == null) {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
this.sourceText = sourceText;
|
||||
this.functions = functions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compiles the given expression with the specified parent classloader
|
||||
*
|
||||
* @return A new compiled expression
|
||||
* @throws ParseException on failure to compile
|
||||
*/
|
||||
private Expression compileExpression(ClassLoader parent) throws ParseException {
|
||||
try {
|
||||
Tree antlrTree = getAntlrComputedExpressionTree();
|
||||
|
||||
beginCompile();
|
||||
recursiveCompile(antlrTree, Type.DOUBLE_TYPE);
|
||||
endCompile();
|
||||
|
||||
Class<? extends Expression> evaluatorClass = new Loader(parent)
|
||||
.define(COMPILED_EXPRESSION_CLASS, classWriter.toByteArray());
|
||||
Constructor<? extends Expression> constructor = evaluatorClass.getConstructor(String.class, String[].class);
|
||||
return constructor.newInstance(sourceText, externalsMap.keySet().toArray(new String[externalsMap.size()]));
|
||||
} catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException exception) {
|
||||
throw new IllegalStateException("An internal error occurred attempting to compile the expression (" + sourceText + ").", exception);
|
||||
}
|
||||
}
|
||||
|
||||
private void beginCompile() {
|
||||
classWriter.visit(CLASSFILE_VERSION,
|
||||
Opcodes.ACC_PUBLIC | Opcodes.ACC_SUPER | Opcodes.ACC_FINAL | Opcodes.ACC_SYNTHETIC,
|
||||
COMPILED_EXPRESSION_INTERNAL,
|
||||
null, EXPRESSION_TYPE.getInternalName(), null);
|
||||
String clippedSourceText = (sourceText.length() <= MAX_SOURCE_LENGTH) ?
|
||||
sourceText : (sourceText.substring(0, MAX_SOURCE_LENGTH - 3) + "...");
|
||||
classWriter.visitSource(clippedSourceText, null);
|
||||
|
||||
GeneratorAdapter constructor = new GeneratorAdapter(Opcodes.ACC_PUBLIC | Opcodes.ACC_SYNTHETIC,
|
||||
EXPRESSION_CTOR, null, null, classWriter);
|
||||
constructor.loadThis();
|
||||
constructor.loadArgs();
|
||||
constructor.invokeConstructor(EXPRESSION_TYPE, EXPRESSION_CTOR);
|
||||
constructor.returnValue();
|
||||
constructor.endMethod();
|
||||
|
||||
gen = new GeneratorAdapter(Opcodes.ACC_PUBLIC | Opcodes.ACC_SYNTHETIC,
|
||||
EVALUATE_METHOD, null, null, classWriter);
|
||||
}
|
||||
|
||||
private void recursiveCompile(Tree current, Type expected) {
|
||||
int type = current.getType();
|
||||
String text = current.getText();
|
||||
|
||||
switch (type) {
|
||||
case XJavascriptParser.AT_CALL:
|
||||
Tree identifier = current.getChild(0);
|
||||
String call = identifier.getText();
|
||||
int arguments = current.getChildCount() - 1;
|
||||
|
||||
Method method = functions.get(call);
|
||||
if (method == null) {
|
||||
throw new IllegalArgumentException("Unrecognized method call (" + call + ").");
|
||||
}
|
||||
|
||||
int arity = method.getParameterTypes().length;
|
||||
if (arguments != arity) {
|
||||
throw new IllegalArgumentException("Expected (" + arity + ") arguments for method call (" +
|
||||
call + "), but found (" + arguments + ").");
|
||||
}
|
||||
|
||||
for (int argument = 1; argument <= arguments; ++argument) {
|
||||
recursiveCompile(current.getChild(argument), Type.DOUBLE_TYPE);
|
||||
}
|
||||
|
||||
gen.invokeStatic(Type.getType(method.getDeclaringClass()),
|
||||
org.objectweb.asm.commons.Method.getMethod(method));
|
||||
|
||||
gen.cast(Type.DOUBLE_TYPE, expected);
|
||||
break;
|
||||
case XJavascriptParser.VARIABLE:
|
||||
int index;
|
||||
|
||||
// normalize quotes
|
||||
text = normalizeQuotes(text);
|
||||
|
||||
|
||||
if (externalsMap.containsKey(text)) {
|
||||
index = externalsMap.get(text);
|
||||
} else {
|
||||
index = externalsMap.size();
|
||||
externalsMap.put(text, index);
|
||||
}
|
||||
|
||||
gen.loadArg(1);
|
||||
gen.push(index);
|
||||
gen.arrayLoad(FUNCTION_VALUES_TYPE);
|
||||
gen.loadArg(0);
|
||||
gen.invokeVirtual(FUNCTION_VALUES_TYPE, DOUBLE_VAL_METHOD);
|
||||
gen.cast(Type.DOUBLE_TYPE, expected);
|
||||
break;
|
||||
case XJavascriptParser.HEX:
|
||||
pushLong(expected, Long.parseLong(text.substring(2), 16));
|
||||
break;
|
||||
case XJavascriptParser.OCTAL:
|
||||
pushLong(expected, Long.parseLong(text.substring(1), 8));
|
||||
break;
|
||||
case XJavascriptParser.DECIMAL:
|
||||
gen.push(Double.parseDouble(text));
|
||||
gen.cast(Type.DOUBLE_TYPE, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_NEGATE:
|
||||
recursiveCompile(current.getChild(0), Type.DOUBLE_TYPE);
|
||||
gen.visitInsn(Opcodes.DNEG);
|
||||
gen.cast(Type.DOUBLE_TYPE, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_ADD:
|
||||
pushArith(Opcodes.DADD, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_SUBTRACT:
|
||||
pushArith(Opcodes.DSUB, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_MULTIPLY:
|
||||
pushArith(Opcodes.DMUL, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_DIVIDE:
|
||||
pushArith(Opcodes.DDIV, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_MODULO:
|
||||
pushArith(Opcodes.DREM, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_BIT_SHL:
|
||||
pushShift(Opcodes.LSHL, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_BIT_SHR:
|
||||
pushShift(Opcodes.LSHR, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_BIT_SHU:
|
||||
pushShift(Opcodes.LUSHR, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_BIT_AND:
|
||||
pushBitwise(Opcodes.LAND, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_BIT_OR:
|
||||
pushBitwise(Opcodes.LOR, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_BIT_XOR:
|
||||
pushBitwise(Opcodes.LXOR, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_BIT_NOT:
|
||||
recursiveCompile(current.getChild(0), Type.LONG_TYPE);
|
||||
gen.push(-1L);
|
||||
gen.visitInsn(Opcodes.LXOR);
|
||||
gen.cast(Type.LONG_TYPE, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_COMP_EQ:
|
||||
pushCond(GeneratorAdapter.EQ, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_COMP_NEQ:
|
||||
pushCond(GeneratorAdapter.NE, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_COMP_LT:
|
||||
pushCond(GeneratorAdapter.LT, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_COMP_GT:
|
||||
pushCond(GeneratorAdapter.GT, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_COMP_LTE:
|
||||
pushCond(GeneratorAdapter.LE, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_COMP_GTE:
|
||||
pushCond(GeneratorAdapter.GE, current, expected);
|
||||
break;
|
||||
case XJavascriptParser.AT_BOOL_NOT:
|
||||
Label labelNotTrue = new Label();
|
||||
Label labelNotReturn = new Label();
|
||||
|
||||
recursiveCompile(current.getChild(0), Type.INT_TYPE);
|
||||
gen.visitJumpInsn(Opcodes.IFEQ, labelNotTrue);
|
||||
pushBoolean(expected, false);
|
||||
gen.goTo(labelNotReturn);
|
||||
gen.visitLabel(labelNotTrue);
|
||||
pushBoolean(expected, true);
|
||||
gen.visitLabel(labelNotReturn);
|
||||
break;
|
||||
case XJavascriptParser.AT_BOOL_AND:
|
||||
Label andFalse = new Label();
|
||||
Label andEnd = new Label();
|
||||
|
||||
recursiveCompile(current.getChild(0), Type.INT_TYPE);
|
||||
gen.visitJumpInsn(Opcodes.IFEQ, andFalse);
|
||||
recursiveCompile(current.getChild(1), Type.INT_TYPE);
|
||||
gen.visitJumpInsn(Opcodes.IFEQ, andFalse);
|
||||
pushBoolean(expected, true);
|
||||
gen.goTo(andEnd);
|
||||
gen.visitLabel(andFalse);
|
||||
pushBoolean(expected, false);
|
||||
gen.visitLabel(andEnd);
|
||||
break;
|
||||
case XJavascriptParser.AT_BOOL_OR:
|
||||
Label orTrue = new Label();
|
||||
Label orEnd = new Label();
|
||||
|
||||
recursiveCompile(current.getChild(0), Type.INT_TYPE);
|
||||
gen.visitJumpInsn(Opcodes.IFNE, orTrue);
|
||||
recursiveCompile(current.getChild(1), Type.INT_TYPE);
|
||||
gen.visitJumpInsn(Opcodes.IFNE, orTrue);
|
||||
pushBoolean(expected, false);
|
||||
gen.goTo(orEnd);
|
||||
gen.visitLabel(orTrue);
|
||||
pushBoolean(expected, true);
|
||||
gen.visitLabel(orEnd);
|
||||
break;
|
||||
case XJavascriptParser.AT_COND_QUE:
|
||||
Label condFalse = new Label();
|
||||
Label condEnd = new Label();
|
||||
|
||||
recursiveCompile(current.getChild(0), Type.INT_TYPE);
|
||||
gen.visitJumpInsn(Opcodes.IFEQ, condFalse);
|
||||
recursiveCompile(current.getChild(1), expected);
|
||||
gen.goTo(condEnd);
|
||||
gen.visitLabel(condFalse);
|
||||
recursiveCompile(current.getChild(2), expected);
|
||||
gen.visitLabel(condEnd);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unknown operation specified: (" + current.getText() + ").");
|
||||
}
|
||||
}
|
||||
|
||||
private void pushArith(int operator, Tree current, Type expected) {
|
||||
pushBinaryOp(operator, current, expected, Type.DOUBLE_TYPE, Type.DOUBLE_TYPE, Type.DOUBLE_TYPE);
|
||||
}
|
||||
|
||||
private void pushShift(int operator, Tree current, Type expected) {
|
||||
pushBinaryOp(operator, current, expected, Type.LONG_TYPE, Type.INT_TYPE, Type.LONG_TYPE);
|
||||
}
|
||||
|
||||
private void pushBitwise(int operator, Tree current, Type expected) {
|
||||
pushBinaryOp(operator, current, expected, Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE);
|
||||
}
|
||||
|
||||
private void pushBinaryOp(int operator, Tree current, Type expected, Type arg1, Type arg2, Type returnType) {
|
||||
recursiveCompile(current.getChild(0), arg1);
|
||||
recursiveCompile(current.getChild(1), arg2);
|
||||
gen.visitInsn(operator);
|
||||
gen.cast(returnType, expected);
|
||||
}
|
||||
|
||||
private void pushCond(int operator, Tree current, Type expected) {
|
||||
Label labelTrue = new Label();
|
||||
Label labelReturn = new Label();
|
||||
|
||||
recursiveCompile(current.getChild(0), Type.DOUBLE_TYPE);
|
||||
recursiveCompile(current.getChild(1), Type.DOUBLE_TYPE);
|
||||
|
||||
gen.ifCmp(Type.DOUBLE_TYPE, operator, labelTrue);
|
||||
pushBoolean(expected, false);
|
||||
gen.goTo(labelReturn);
|
||||
gen.visitLabel(labelTrue);
|
||||
pushBoolean(expected, true);
|
||||
gen.visitLabel(labelReturn);
|
||||
}
|
||||
|
||||
private void pushBoolean(Type expected, boolean truth) {
|
||||
switch (expected.getSort()) {
|
||||
case Type.INT:
|
||||
gen.push(truth);
|
||||
break;
|
||||
case Type.LONG:
|
||||
gen.push(truth ? 1L : 0L);
|
||||
break;
|
||||
case Type.DOUBLE:
|
||||
gen.push(truth ? 1. : 0.);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Invalid expected type: " + expected);
|
||||
}
|
||||
}
|
||||
|
||||
private void pushLong(Type expected, long i) {
|
||||
switch (expected.getSort()) {
|
||||
case Type.INT:
|
||||
gen.push((int) i);
|
||||
break;
|
||||
case Type.LONG:
|
||||
gen.push(i);
|
||||
break;
|
||||
case Type.DOUBLE:
|
||||
gen.push((double) i);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Invalid expected type: " + expected);
|
||||
}
|
||||
}
|
||||
|
||||
private void endCompile() {
|
||||
gen.returnValue();
|
||||
gen.endMethod();
|
||||
|
||||
classWriter.visitEnd();
|
||||
}
|
||||
|
||||
private Tree getAntlrComputedExpressionTree() throws ParseException {
|
||||
CharStream input = new ANTLRStringStream(sourceText);
|
||||
XJavascriptLexer lexer = new XJavascriptLexer(input);
|
||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
XJavascriptParser parser = new XJavascriptParser(tokens);
|
||||
|
||||
try {
|
||||
return parser.expression().tree;
|
||||
|
||||
} catch (RecognitionException exception) {
|
||||
throw new IllegalArgumentException(exception);
|
||||
} catch (RuntimeException exception) {
|
||||
if (exception.getCause() instanceof ParseException) {
|
||||
throw (ParseException)exception.getCause();
|
||||
}
|
||||
throw exception;
|
||||
}
|
||||
}
|
||||
|
||||
private static String normalizeQuotes(String text) {
|
||||
StringBuilder out = new StringBuilder(text.length());
|
||||
boolean inDoubleQuotes = false;
|
||||
for (int i = 0; i < text.length(); ++i) {
|
||||
char c = text.charAt(i);
|
||||
if (c == '\\') {
|
||||
c = text.charAt(++i);
|
||||
if (c == '\\') {
|
||||
out.append('\\'); // re-escape the backslash
|
||||
}
|
||||
// no escape for double quote
|
||||
} else if (c == '\'') {
|
||||
if (inDoubleQuotes) {
|
||||
// escape in output
|
||||
out.append('\\');
|
||||
} else {
|
||||
int j = findSingleQuoteStringEnd(text, i);
|
||||
out.append(text, i, j); // copy up to end quote (leave end for append below)
|
||||
i = j;
|
||||
}
|
||||
} else if (c == '"') {
|
||||
c = '\''; // change beginning/ending doubles to singles
|
||||
inDoubleQuotes = !inDoubleQuotes;
|
||||
}
|
||||
out.append(c);
|
||||
}
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
private static int findSingleQuoteStringEnd(String text, int start) {
|
||||
++start; // skip beginning
|
||||
while (text.charAt(start) != '\'') {
|
||||
if (text.charAt(start) == '\\') {
|
||||
++start; // blindly consume escape value
|
||||
}
|
||||
++start;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
* The default set of functions available to expressions.
|
||||
* <p>
|
||||
* See the {@link org.apache.lucene.expressions.js package documentation}
|
||||
* for a list.
|
||||
*/
|
||||
public static final Map<String,Method> DEFAULT_FUNCTIONS;
|
||||
static {
|
||||
Map<String,Method> map = new HashMap<>();
|
||||
try {
|
||||
final Properties props = new Properties();
|
||||
try (Reader in = IOUtils.getDecodingReader(JavascriptCompiler.class,
|
||||
JavascriptCompiler.class.getSimpleName() + ".properties", StandardCharsets.UTF_8)) {
|
||||
props.load(in);
|
||||
}
|
||||
for (final String call : props.stringPropertyNames()) {
|
||||
final String[] vals = props.getProperty(call).split(",");
|
||||
if (vals.length != 3) {
|
||||
throw new Error("Syntax error while reading Javascript functions from resource");
|
||||
}
|
||||
final Class<?> clazz = Class.forName(vals[0].trim());
|
||||
final String methodName = vals[1].trim();
|
||||
final int arity = Integer.parseInt(vals[2].trim());
|
||||
@SuppressWarnings({"rawtypes", "unchecked"}) Class[] args = new Class[arity];
|
||||
Arrays.fill(args, double.class);
|
||||
Method method = clazz.getMethod(methodName, args);
|
||||
checkFunction(method, JavascriptCompiler.class.getClassLoader());
|
||||
map.put(call, method);
|
||||
}
|
||||
} catch (NoSuchMethodException | ClassNotFoundException | IOException e) {
|
||||
throw new Error("Cannot resolve function", e);
|
||||
}
|
||||
DEFAULT_FUNCTIONS = Collections.unmodifiableMap(map);
|
||||
}
|
||||
|
||||
private static void checkFunction(Method method, ClassLoader parent) {
|
||||
// We can only call the function if the given parent class loader of our compiled class has access to the method:
|
||||
final ClassLoader functionClassloader = method.getDeclaringClass().getClassLoader();
|
||||
if (functionClassloader != null) { // it is a system class iff null!
|
||||
boolean found = false;
|
||||
while (parent != null) {
|
||||
if (parent == functionClassloader) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
parent = parent.getParent();
|
||||
}
|
||||
if (!found) {
|
||||
throw new IllegalArgumentException(method + " is not declared by a class which is accessible by the given parent ClassLoader.");
|
||||
}
|
||||
}
|
||||
// do some checks if the signature is "compatible":
|
||||
if (!Modifier.isStatic(method.getModifiers())) {
|
||||
throw new IllegalArgumentException(method + " is not static.");
|
||||
}
|
||||
if (!Modifier.isPublic(method.getModifiers())) {
|
||||
throw new IllegalArgumentException(method + " is not public.");
|
||||
}
|
||||
if (!Modifier.isPublic(method.getDeclaringClass().getModifiers())) {
|
||||
throw new IllegalArgumentException(method.getDeclaringClass().getName() + " is not public.");
|
||||
}
|
||||
for (Class<?> clazz : method.getParameterTypes()) {
|
||||
if (!clazz.equals(double.class)) {
|
||||
throw new IllegalArgumentException(method + " must take only double parameters");
|
||||
}
|
||||
}
|
||||
if (method.getReturnType() != double.class) {
|
||||
throw new IllegalArgumentException(method + " does not return a double.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,106 +0,0 @@
|
|||
package org.apache.lucene.expressions.js;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A helper to parse the context of a variable name, which is the base variable, followed by the
|
||||
* sequence of array (integer or string indexed) and member accesses.
|
||||
*/
|
||||
public class XVariableContext {
|
||||
|
||||
static {
|
||||
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5806)";
|
||||
}
|
||||
|
||||
public static enum Type {
|
||||
MEMBER, // "dot" access
|
||||
STR_INDEX, // brackets with a string
|
||||
INT_INDEX // brackets with a positive integer
|
||||
}
|
||||
|
||||
public final Type type;
|
||||
public final String text;
|
||||
public final int integer;
|
||||
|
||||
private XVariableContext(Type c, String s, int i) {
|
||||
type = c;
|
||||
text = s;
|
||||
integer = i;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a normalized javascript variable. All strings in the variable should be single quoted,
|
||||
* and no spaces (except possibly within strings).
|
||||
*/
|
||||
public static final XVariableContext[] parse(String variable) {
|
||||
char[] text = variable.toCharArray();
|
||||
List<XVariableContext> contexts = new ArrayList<>();
|
||||
int i = addMember(text, 0, contexts); // base variable is a "member" of the global namespace
|
||||
while (i < text.length) {
|
||||
if (text[i] == '[') {
|
||||
if (text[++i] == '\'') {
|
||||
i = addStringIndex(text, i, contexts);
|
||||
} else {
|
||||
i = addIntIndex(text, i, contexts);
|
||||
}
|
||||
++i; // move past end bracket
|
||||
} else { // text[i] == '.', ie object member
|
||||
i = addMember(text, i + 1, contexts);
|
||||
}
|
||||
}
|
||||
return contexts.toArray(new XVariableContext[contexts.size()]);
|
||||
}
|
||||
|
||||
// i points to start of member name
|
||||
private static int addMember(final char[] text, int i, List<XVariableContext> contexts) {
|
||||
int j = i + 1;
|
||||
while (j < text.length && text[j] != '[' && text[j] != '.') ++j; // find first array or member access
|
||||
contexts.add(new XVariableContext(Type.MEMBER, new String(text, i, j - i), -1));
|
||||
return j;
|
||||
}
|
||||
|
||||
// i points to start of single quoted index
|
||||
private static int addStringIndex(final char[] text, int i, List<XVariableContext> contexts) {
|
||||
++i; // move past quote
|
||||
int j = i;
|
||||
while (text[j] != '\'') { // find end of single quoted string
|
||||
if (text[j] == '\\') ++j; // skip over escapes
|
||||
++j;
|
||||
}
|
||||
StringBuffer buf = new StringBuffer(j - i); // space for string, without end quote
|
||||
while (i < j) { // copy string to buffer (without begin/end quotes)
|
||||
if (text[i] == '\\') ++i; // unescape escapes
|
||||
buf.append(text[i]);
|
||||
++i;
|
||||
}
|
||||
contexts.add(new XVariableContext(Type.STR_INDEX, buf.toString(), -1));
|
||||
return j + 1; // move past quote, return end bracket location
|
||||
}
|
||||
|
||||
// i points to start of integer index
|
||||
private static int addIntIndex(final char[] text, int i, List<XVariableContext> contexts) {
|
||||
int j = i + 1;
|
||||
while (text[j] != ']') ++j; // find end of array access
|
||||
int index = Integer.parseInt(new String(text, i, j - i));
|
||||
contexts.add(new XVariableContext(Type.INT_INDEX, null, index));
|
||||
return j ;
|
||||
}
|
||||
}
|
|
@ -1,306 +0,0 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
|
||||
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/** maps per-segment ordinals to/from global ordinal space */
|
||||
// TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
|
||||
// TODO: use more efficient packed ints structures?
|
||||
// TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
|
||||
public class XOrdinalMap implements Accountable {
|
||||
|
||||
static {
|
||||
assert org.elasticsearch.Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9: "Remove this code once we upgrade to Lucene 4.10 (LUCENE-5780, LUCENE-5782)";
|
||||
}
|
||||
|
||||
private static class SegmentMap implements Accountable {
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
|
||||
|
||||
/** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
|
||||
private static int[] map(final long[] weights) {
|
||||
final int[] newToOld = new int[weights.length];
|
||||
for (int i = 0; i < weights.length; ++i) {
|
||||
newToOld[i] = i;
|
||||
}
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
final int tmp = newToOld[i];
|
||||
newToOld[i] = newToOld[j];
|
||||
newToOld[j] = tmp;
|
||||
}
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
// j first since we actually want higher weights first
|
||||
return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
|
||||
}
|
||||
}.sort(0, weights.length);
|
||||
return newToOld;
|
||||
}
|
||||
|
||||
/** Inverse the map. */
|
||||
private static int[] inverse(int[] map) {
|
||||
final int[] inverse = new int[map.length];
|
||||
for (int i = 0; i < map.length; ++i) {
|
||||
inverse[map[i]] = i;
|
||||
}
|
||||
return inverse;
|
||||
}
|
||||
|
||||
private final int[] newToOld, oldToNew;
|
||||
|
||||
SegmentMap(long[] weights) {
|
||||
newToOld = map(weights);
|
||||
oldToNew = inverse(newToOld);
|
||||
assert Arrays.equals(newToOld, inverse(oldToNew));
|
||||
}
|
||||
|
||||
int newToOld(int segment) {
|
||||
return newToOld[segment];
|
||||
}
|
||||
|
||||
int oldToNew(int segment) {
|
||||
return oldToNew[segment];
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an ordinal map that uses the number of unique values of each
|
||||
* {@link SortedDocValues} instance as a weight.
|
||||
* @see #build(Object, TermsEnum[], long[], float)
|
||||
*/
|
||||
public static XOrdinalMap build(Object owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
|
||||
final TermsEnum[] subs = new TermsEnum[values.length];
|
||||
final long[] weights = new long[values.length];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
subs[i] = values[i].termsEnum();
|
||||
weights[i] = values[i].getValueCount();
|
||||
}
|
||||
return build(owner, subs, weights, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an ordinal map that uses the number of unique values of each
|
||||
* {@link SortedSetDocValues} instance as a weight.
|
||||
* @see #build(Object, TermsEnum[], long[], float)
|
||||
*/
|
||||
public static XOrdinalMap build(Object owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
|
||||
final TermsEnum[] subs = new TermsEnum[values.length];
|
||||
final long[] weights = new long[values.length];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
subs[i] = values[i].termsEnum();
|
||||
weights[i] = values[i].getValueCount();
|
||||
}
|
||||
return build(owner, subs, weights, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an ordinal map that allows mapping ords to/from a merged
|
||||
* space from <code>subs</code>.
|
||||
* @param owner a cache key
|
||||
* @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
|
||||
* not be dense (e.g. can be FilteredTermsEnums}.
|
||||
* @param weights a weight for each sub. This is ideally correlated with
|
||||
* the number of unique terms that each sub introduces compared
|
||||
* to the other subs
|
||||
* @throws IOException if an I/O error occurred.
|
||||
*/
|
||||
public static XOrdinalMap build(Object owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
|
||||
if (subs.length != weights.length) {
|
||||
throw new IllegalArgumentException("subs and weights must have the same length");
|
||||
}
|
||||
|
||||
// enums are not sorted, so let's sort to save memory
|
||||
final SegmentMap segmentMap = new SegmentMap(weights);
|
||||
return new XOrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(XOrdinalMap.class);
|
||||
|
||||
// cache key of whoever asked for this awful thing
|
||||
final Object owner;
|
||||
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
|
||||
final MonotonicAppendingLongBuffer globalOrdDeltas;
|
||||
// globalOrd -> first segment container
|
||||
final AppendingPackedLongBuffer firstSegments;
|
||||
// for every segment, segmentOrd -> globalOrd
|
||||
final LongValues segmentToGlobalOrds[];
|
||||
// the map from/to segment ids
|
||||
final SegmentMap segmentMap;
|
||||
// ram usage
|
||||
final long ramBytesUsed;
|
||||
|
||||
XOrdinalMap(Object owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
|
||||
// create the ordinal mappings by pulling a termsenum over each sub's
|
||||
// unique terms, and walking a multitermsenum over those
|
||||
this.owner = owner;
|
||||
this.segmentMap = segmentMap;
|
||||
// even though we accept an overhead ratio, we keep these ones with COMPACT
|
||||
// since they are only used to resolve values given a global ord, which is
|
||||
// slow anyway
|
||||
globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
|
||||
firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
|
||||
final MonotonicAppendingLongBuffer[] ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
|
||||
for (int i = 0; i < ordDeltas.length; i++) {
|
||||
ordDeltas[i] = new MonotonicAppendingLongBuffer(acceptableOverheadRatio);
|
||||
}
|
||||
long[] ordDeltaBits = new long[subs.length];
|
||||
long segmentOrds[] = new long[subs.length];
|
||||
ReaderSlice slices[] = new ReaderSlice[subs.length];
|
||||
TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
|
||||
for (int i = 0; i < slices.length; i++) {
|
||||
slices[i] = new ReaderSlice(0, 0, i);
|
||||
indexes[i] = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
|
||||
}
|
||||
MultiTermsEnum mte = new MultiTermsEnum(slices);
|
||||
mte.reset(indexes);
|
||||
long globalOrd = 0;
|
||||
while (mte.next() != null) {
|
||||
TermsEnumWithSlice matches[] = mte.getMatchArray();
|
||||
int firstSegmentIndex = Integer.MAX_VALUE;
|
||||
long globalOrdDelta = Long.MAX_VALUE;
|
||||
for (int i = 0; i < mte.getMatchCount(); i++) {
|
||||
int segmentIndex = matches[i].index;
|
||||
long segmentOrd = matches[i].terms.ord();
|
||||
long delta = globalOrd - segmentOrd;
|
||||
// We compute the least segment where the term occurs. In case the
|
||||
// first segment contains most (or better all) values, this will
|
||||
// help save significant memory
|
||||
if (segmentIndex < firstSegmentIndex) {
|
||||
firstSegmentIndex = segmentIndex;
|
||||
globalOrdDelta = delta;
|
||||
}
|
||||
// for each per-segment ord, map it back to the global term.
|
||||
while (segmentOrds[segmentIndex] <= segmentOrd) {
|
||||
ordDeltaBits[segmentIndex] |= delta;
|
||||
ordDeltas[segmentIndex].add(delta);
|
||||
segmentOrds[segmentIndex]++;
|
||||
}
|
||||
}
|
||||
// for each unique term, just mark the first segment index/delta where it occurs
|
||||
assert firstSegmentIndex < segmentOrds.length;
|
||||
firstSegments.add(firstSegmentIndex);
|
||||
globalOrdDeltas.add(globalOrdDelta);
|
||||
globalOrd++;
|
||||
}
|
||||
firstSegments.freeze();
|
||||
globalOrdDeltas.freeze();
|
||||
for (int i = 0; i < ordDeltas.length; ++i) {
|
||||
ordDeltas[i].freeze();
|
||||
}
|
||||
// ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
|
||||
segmentToGlobalOrds = new LongValues[subs.length];
|
||||
long ramBytesUsed = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed()
|
||||
+ firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
|
||||
+ segmentMap.ramBytesUsed();
|
||||
for (int i = 0; i < ordDeltas.length; ++i) {
|
||||
final MonotonicAppendingLongBuffer deltas = ordDeltas[i];
|
||||
if (ordDeltaBits[i] == 0L) {
|
||||
// segment ords perfectly match global ordinals
|
||||
// likely in case of low cardinalities and large segments
|
||||
segmentToGlobalOrds[i] = LongValues.IDENTITY;
|
||||
} else {
|
||||
final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
|
||||
final long monotonicBits = deltas.ramBytesUsed() * 8;
|
||||
final long packedBits = bitsRequired * deltas.size();
|
||||
if (deltas.size() <= Integer.MAX_VALUE
|
||||
&& packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
|
||||
// monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
|
||||
final int size = (int) deltas.size();
|
||||
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
|
||||
final MonotonicAppendingLongBuffer.Iterator it = deltas.iterator();
|
||||
for (int ord = 0; ord < size; ++ord) {
|
||||
newDeltas.set(ord, it.next());
|
||||
}
|
||||
assert !it.hasNext();
|
||||
segmentToGlobalOrds[i] = new LongValues() {
|
||||
@Override
|
||||
public long get(long ord) {
|
||||
return ord + newDeltas.get((int) ord);
|
||||
}
|
||||
};
|
||||
ramBytesUsed += newDeltas.ramBytesUsed();
|
||||
} else {
|
||||
segmentToGlobalOrds[i] = new LongValues() {
|
||||
@Override
|
||||
public long get(long ord) {
|
||||
return ord + deltas.get(ord);
|
||||
}
|
||||
};
|
||||
ramBytesUsed += deltas.ramBytesUsed();
|
||||
}
|
||||
ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
|
||||
}
|
||||
}
|
||||
this.ramBytesUsed = ramBytesUsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a segment number, return a {@link LongValues} instance that maps
|
||||
* segment ordinals to global ordinals.
|
||||
*/
|
||||
public LongValues getGlobalOrds(int segmentIndex) {
|
||||
return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Given global ordinal, returns the ordinal of the first segment which contains
|
||||
* this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
|
||||
*/
|
||||
public long getFirstSegmentOrd(long globalOrd) {
|
||||
return globalOrd - globalOrdDeltas.get(globalOrd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a global ordinal, returns the index of the first
|
||||
* segment that contains this term.
|
||||
*/
|
||||
public int getFirstSegmentNumber(long globalOrd) {
|
||||
return segmentMap.newToOld((int) firstSegments.get(globalOrd));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of unique terms in global ord space.
|
||||
*/
|
||||
public long getValueCount() {
|
||||
return globalOrdDeltas.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return ramBytesUsed;
|
||||
}
|
||||
}
|
|
@ -26,7 +26,9 @@ import org.apache.lucene.search.suggest.InputIterator;
|
|||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.automaton.*;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
import org.apache.lucene.util.fst.*;
|
||||
import org.apache.lucene.util.fst.FST.BytesReader;
|
||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||
|
@ -254,67 +256,95 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
return fst == null ? 0 : fst.ramBytesUsed();
|
||||
}
|
||||
|
||||
private static void copyDestTransitions(State from, State to, List<Transition> transitions) {
|
||||
if (to.isAccept()) {
|
||||
from.setAccept(true);
|
||||
}
|
||||
for(Transition t : to.getTransitions()) {
|
||||
transitions.add(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Replaces SEP with epsilon or remaps them if
|
||||
// we were asked to preserve them:
|
||||
private static void replaceSep(Automaton a, boolean preserveSep, int replaceSep) {
|
||||
private Automaton replaceSep(Automaton a) {
|
||||
|
||||
State[] states = a.getNumberedStates();
|
||||
Automaton result = new Automaton();
|
||||
|
||||
// Go in reverse topo sort so we know we only have to
|
||||
// make one pass:
|
||||
for(int stateNumber=states.length-1;stateNumber >=0;stateNumber--) {
|
||||
final State state = states[stateNumber];
|
||||
List<Transition> newTransitions = new ArrayList<>();
|
||||
for(Transition t : state.getTransitions()) {
|
||||
assert t.getMin() == t.getMax();
|
||||
if (t.getMin() == TokenStreamToAutomaton.POS_SEP) {
|
||||
if (preserveSep) {
|
||||
// Remap to SEP_LABEL:
|
||||
newTransitions.add(new Transition(replaceSep, t.getDest()));
|
||||
// Copy all states over
|
||||
int numStates = a.getNumStates();
|
||||
for(int s=0;s<numStates;s++) {
|
||||
result.createState();
|
||||
result.setAccept(s, a.isAccept(s));
|
||||
}
|
||||
|
||||
// Go in reverse topo sort so we know we only have to
|
||||
// make one pass:
|
||||
Transition t = new Transition();
|
||||
int[] topoSortStates = topoSortStates(a);
|
||||
for(int i=0;i<topoSortStates.length;i++) {
|
||||
int state = topoSortStates[topoSortStates.length-1-i];
|
||||
int count = a.initTransition(state, t);
|
||||
for(int j=0;j<count;j++) {
|
||||
a.getNextTransition(t);
|
||||
if (t.min == TokenStreamToAutomaton.POS_SEP) {
|
||||
assert t.max == TokenStreamToAutomaton.POS_SEP;
|
||||
if (preserveSep) {
|
||||
// Remap to SEP_LABEL:
|
||||
result.addTransition(state, t.dest, SEP_LABEL);
|
||||
} else {
|
||||
result.addEpsilon(state, t.dest);
|
||||
}
|
||||
} else if (t.min == TokenStreamToAutomaton.HOLE) {
|
||||
assert t.max == TokenStreamToAutomaton.HOLE;
|
||||
|
||||
// Just remove the hole: there will then be two
|
||||
// SEP tokens next to each other, which will only
|
||||
// match another hole at search time. Note that
|
||||
// it will also match an empty-string token ... if
|
||||
// that's somehow a problem we can always map HOLE
|
||||
// to a dedicated byte (and escape it in the
|
||||
// input).
|
||||
result.addEpsilon(state, t.dest);
|
||||
} else {
|
||||
copyDestTransitions(state, t.getDest(), newTransitions);
|
||||
a.setDeterministic(false);
|
||||
result.addTransition(state, t.dest, t.min, t.max);
|
||||
}
|
||||
} else if (t.getMin() == TokenStreamToAutomaton.HOLE) {
|
||||
|
||||
// Just remove the hole: there will then be two
|
||||
// SEP tokens next to each other, which will only
|
||||
// match another hole at search time. Note that
|
||||
// it will also match an empty-string token ... if
|
||||
// that's somehow a problem we can always map HOLE
|
||||
// to a dedicated byte (and escape it in the
|
||||
// input).
|
||||
copyDestTransitions(state, t.getDest(), newTransitions);
|
||||
a.setDeterministic(false);
|
||||
} else {
|
||||
newTransitions.add(t);
|
||||
}
|
||||
}
|
||||
state.setTransitions(newTransitions.toArray(new Transition[newTransitions.size()]));
|
||||
}
|
||||
|
||||
result.finishState();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Automaton convertAutomaton(Automaton a) {
|
||||
if (queryPrefix != null) {
|
||||
a = Automaton.concatenate(Arrays.asList(queryPrefix, a));
|
||||
BasicOperations.determinize(a);
|
||||
a = Operations.concatenate(Arrays.asList(queryPrefix, a));
|
||||
a = Operations.determinize(a);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
private int[] topoSortStates(Automaton a) {
|
||||
int[] states = new int[a.getNumStates()];
|
||||
final Set<Integer> visited = new HashSet<>();
|
||||
final LinkedList<Integer> worklist = new LinkedList<>();
|
||||
worklist.add(0);
|
||||
visited.add(0);
|
||||
int upto = 0;
|
||||
states[upto] = 0;
|
||||
upto++;
|
||||
Transition t = new Transition();
|
||||
while (worklist.size() > 0) {
|
||||
int s = worklist.removeFirst();
|
||||
int count = a.initTransition(s, t);
|
||||
for (int i=0;i<count;i++) {
|
||||
a.getNextTransition(t);
|
||||
if (!visited.contains(t.dest)) {
|
||||
visited.add(t.dest);
|
||||
worklist.add(t.dest);
|
||||
states[upto++] = t.dest;
|
||||
}
|
||||
}
|
||||
}
|
||||
return states;
|
||||
}
|
||||
|
||||
/** Just escapes the 0xff byte (which we still for SEP). */
|
||||
private static final class EscapingTokenStreamToAutomaton extends TokenStreamToAutomaton {
|
||||
|
||||
final BytesRef spare = new BytesRef();
|
||||
final BytesRefBuilder spare = new BytesRefBuilder();
|
||||
private char sepLabel;
|
||||
|
||||
public EscapingTokenStreamToAutomaton(char sepLabel) {
|
||||
|
@ -327,21 +357,16 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
for(int i=0;i<in.length;i++) {
|
||||
byte b = in.bytes[in.offset+i];
|
||||
if (b == (byte) sepLabel) {
|
||||
if (spare.bytes.length == upto) {
|
||||
spare.grow(upto+2);
|
||||
}
|
||||
spare.bytes[upto++] = (byte) sepLabel;
|
||||
spare.bytes[upto++] = b;
|
||||
spare.grow(upto+2);
|
||||
spare.setByteAt(upto++, (byte) sepLabel);
|
||||
spare.setByteAt(upto++, b);
|
||||
} else {
|
||||
if (spare.bytes.length == upto) {
|
||||
spare.grow(upto+1);
|
||||
}
|
||||
spare.bytes[upto++] = b;
|
||||
spare.grow(upto+1);
|
||||
spare.setByteAt(upto++, b);
|
||||
}
|
||||
}
|
||||
spare.offset = 0;
|
||||
spare.length = upto;
|
||||
return spare;
|
||||
spare.setLength(upto);
|
||||
return spare.get();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -427,7 +452,7 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
|
||||
OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
OfflineSorter.ByteSequencesReader reader = null;
|
||||
BytesRef scratch = new BytesRef();
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
|
||||
TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
|
||||
|
||||
|
@ -448,10 +473,10 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
Util.toBytesRef(path, scratch);
|
||||
|
||||
// length of the analyzed text (FST input)
|
||||
if (scratch.length > Short.MAX_VALUE-2) {
|
||||
throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length + ")");
|
||||
if (scratch.length() > Short.MAX_VALUE-2) {
|
||||
throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length() + ")");
|
||||
}
|
||||
short analyzedLength = (short) scratch.length;
|
||||
short analyzedLength = (short) scratch.length();
|
||||
|
||||
// compute the required length:
|
||||
// analyzed sequence + weight (4) + surface + analyzedLength (short)
|
||||
|
@ -476,7 +501,7 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
|
||||
output.writeShort(analyzedLength);
|
||||
|
||||
output.writeBytes(scratch.bytes, scratch.offset, scratch.length);
|
||||
output.writeBytes(scratch.bytes(), 0, scratch.length());
|
||||
|
||||
output.writeInt(encodeWeight(iterator.weight()));
|
||||
|
||||
|
@ -513,10 +538,10 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
Builder<Pair<Long,BytesRef>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
|
||||
// Build FST:
|
||||
BytesRef previousAnalyzed = null;
|
||||
BytesRef analyzed = new BytesRef();
|
||||
BytesRefBuilder previousAnalyzed = null;
|
||||
BytesRefBuilder analyzed = new BytesRefBuilder();
|
||||
BytesRef surface = new BytesRef();
|
||||
IntsRef scratchInts = new IntsRef();
|
||||
IntsRefBuilder scratchInts = new IntsRefBuilder();
|
||||
ByteArrayDataInput input = new ByteArrayDataInput();
|
||||
|
||||
// Used to remove duplicate surface forms (but we
|
||||
|
@ -527,28 +552,28 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
|
||||
int dedup = 0;
|
||||
while (reader.read(scratch)) {
|
||||
input.reset(scratch.bytes, scratch.offset, scratch.length);
|
||||
input.reset(scratch.bytes(), 0, scratch.length());
|
||||
short analyzedLength = input.readShort();
|
||||
analyzed.grow(analyzedLength+2);
|
||||
input.readBytes(analyzed.bytes, 0, analyzedLength);
|
||||
analyzed.length = analyzedLength;
|
||||
input.readBytes(analyzed.bytes(), 0, analyzedLength);
|
||||
analyzed.setLength(analyzedLength);
|
||||
|
||||
long cost = input.readInt();
|
||||
|
||||
surface.bytes = scratch.bytes;
|
||||
surface.bytes = scratch.bytes();
|
||||
if (hasPayloads) {
|
||||
surface.length = input.readShort();
|
||||
surface.offset = input.getPosition();
|
||||
} else {
|
||||
surface.offset = input.getPosition();
|
||||
surface.length = scratch.length - surface.offset;
|
||||
surface.length = scratch.length() - surface.offset;
|
||||
}
|
||||
|
||||
if (previousAnalyzed == null) {
|
||||
previousAnalyzed = new BytesRef();
|
||||
previousAnalyzed = new BytesRefBuilder();
|
||||
previousAnalyzed.copyBytes(analyzed);
|
||||
seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
|
||||
} else if (analyzed.equals(previousAnalyzed)) {
|
||||
} else if (analyzed.get().equals(previousAnalyzed.get())) {
|
||||
dedup++;
|
||||
if (dedup >= maxSurfaceFormsPerAnalyzedForm) {
|
||||
// More than maxSurfaceFormsPerAnalyzedForm
|
||||
|
@ -574,23 +599,22 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
|
||||
// NOTE: must be byte 0 so we sort before whatever
|
||||
// is next
|
||||
analyzed.bytes[analyzed.offset+analyzed.length] = 0;
|
||||
analyzed.bytes[analyzed.offset+analyzed.length+1] = (byte) dedup;
|
||||
analyzed.length += 2;
|
||||
analyzed.append((byte) 0);
|
||||
analyzed.append((byte) dedup);
|
||||
|
||||
Util.toIntsRef(analyzed, scratchInts);
|
||||
Util.toIntsRef(analyzed.get(), scratchInts);
|
||||
//System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
|
||||
if (!hasPayloads) {
|
||||
builder.add(scratchInts, outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
|
||||
builder.add(scratchInts.get(), outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
|
||||
} else {
|
||||
int payloadOffset = input.getPosition() + surface.length;
|
||||
int payloadLength = scratch.length - payloadOffset;
|
||||
int payloadLength = scratch.length() - payloadOffset;
|
||||
BytesRef br = new BytesRef(surface.length + 1 + payloadLength);
|
||||
System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
|
||||
br.bytes[surface.length] = (byte) payloadSep;
|
||||
System.arraycopy(scratch.bytes, payloadOffset, br.bytes, surface.length+1, payloadLength);
|
||||
System.arraycopy(scratch.bytes(), payloadOffset, br.bytes, surface.length+1, payloadLength);
|
||||
br.length = br.bytes.length;
|
||||
builder.add(scratchInts, outputs.newPair(cost, br));
|
||||
builder.add(scratchInts.get(), outputs.newPair(cost, br));
|
||||
}
|
||||
}
|
||||
fst = builder.finish();
|
||||
|
@ -647,7 +671,7 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
return true;
|
||||
}
|
||||
|
||||
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRef spare) {
|
||||
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) {
|
||||
LookupResult result;
|
||||
if (hasPayloads) {
|
||||
int sepIndex = -1;
|
||||
|
@ -658,16 +682,14 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
}
|
||||
}
|
||||
assert sepIndex != -1;
|
||||
spare.grow(sepIndex);
|
||||
final int payloadLen = output2.length - sepIndex - 1;
|
||||
UnicodeUtil.UTF8toUTF16(output2.bytes, output2.offset, sepIndex, spare);
|
||||
spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex);
|
||||
BytesRef payload = new BytesRef(payloadLen);
|
||||
System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
|
||||
payload.length = payloadLen;
|
||||
result = new LookupResult(spare.toString(), decodeWeight(output1), payload);
|
||||
} else {
|
||||
spare.grow(output2.length);
|
||||
UnicodeUtil.UTF8toUTF16(output2, spare);
|
||||
spare.copyUTF8Bytes(output2);
|
||||
result = new LookupResult(spare.toString(), decodeWeight(output1));
|
||||
}
|
||||
|
||||
|
@ -716,7 +738,7 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
|
||||
Automaton lookupAutomaton = toLookupAutomaton(key);
|
||||
|
||||
final CharsRef spare = new CharsRef();
|
||||
final CharsRefBuilder spare = new CharsRefBuilder();
|
||||
|
||||
//System.out.println(" now intersect exactFirst=" + exactFirst);
|
||||
|
||||
|
@ -888,20 +910,28 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
}
|
||||
|
||||
public final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
|
||||
// Analyze surface form:
|
||||
TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
|
||||
return toFiniteStrings(ts2a, ts);
|
||||
}
|
||||
public final Set<IntsRef> toFiniteStrings(final TokenStreamToAutomaton ts2a, TokenStream ts) throws IOException {
|
||||
// Analyze surface form:
|
||||
TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
|
||||
return toFiniteStrings(ts2a, ts);
|
||||
}
|
||||
|
||||
public final Set<IntsRef> toFiniteStrings(final TokenStreamToAutomaton ts2a, final TokenStream ts) throws IOException {
|
||||
Automaton automaton = null;
|
||||
try {
|
||||
|
||||
// Create corresponding automaton: labels are bytes
|
||||
// from each analyzed token, with byte 0 used as
|
||||
// separator between tokens:
|
||||
Automaton automaton = ts2a.toAutomaton(ts);
|
||||
ts.close();
|
||||
// Create corresponding automaton: labels are bytes
|
||||
// from each analyzed token, with byte 0 used as
|
||||
// separator between tokens:
|
||||
automaton = ts2a.toAutomaton(ts);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(ts);
|
||||
}
|
||||
|
||||
replaceSep(automaton, preserveSep, sepLabel);
|
||||
automaton = replaceSep(automaton);
|
||||
automaton = convertAutomaton(automaton);
|
||||
|
||||
// TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
|
||||
// assert SpecialOperations.isFinite(automaton);
|
||||
|
||||
// Get all paths from the automaton (there can be
|
||||
// more than one path, eg if the analyzer created a
|
||||
|
@ -910,27 +940,27 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
// TODO: we could walk & add simultaneously, so we
|
||||
// don't have to alloc [possibly biggish]
|
||||
// intermediate HashSet in RAM:
|
||||
return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
|
||||
|
||||
return Operations.getFiniteStrings(automaton, maxGraphExpansions);
|
||||
}
|
||||
|
||||
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
|
||||
// Turn tokenstream into automaton:
|
||||
TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
|
||||
Automaton automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
|
||||
ts.close();
|
||||
// TODO: is there a Reader from a CharSequence?
|
||||
// Turn tokenstream into automaton:
|
||||
Automaton automaton = null;
|
||||
TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
|
||||
try {
|
||||
automaton = getTokenStreamToAutomaton().toAutomaton(ts);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(ts);
|
||||
}
|
||||
|
||||
// TODO: we could use the end offset to "guess"
|
||||
// whether the final token was a partial token; this
|
||||
// would only be a heuristic ... but maybe an OK one.
|
||||
// This way we could eg differentiate "net" from "net ",
|
||||
// which we can't today...
|
||||
automaton = replaceSep(automaton);
|
||||
|
||||
replaceSep(automaton, preserveSep, sepLabel);
|
||||
|
||||
// TODO: we can optimize this somewhat by determinizing
|
||||
// while we convert
|
||||
BasicOperations.determinize(automaton);
|
||||
return automaton;
|
||||
// TODO: we can optimize this somewhat by determinizing
|
||||
// while we convert
|
||||
automaton = Operations.determinize(automaton);
|
||||
return automaton;
|
||||
}
|
||||
|
||||
|
||||
|
@ -967,10 +997,10 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
public static class XBuilder {
|
||||
private Builder<Pair<Long, BytesRef>> builder;
|
||||
private int maxSurfaceFormsPerAnalyzedForm;
|
||||
private IntsRef scratchInts = new IntsRef();
|
||||
private IntsRefBuilder scratchInts = new IntsRefBuilder();
|
||||
private final PairOutputs<Long, BytesRef> outputs;
|
||||
private boolean hasPayloads;
|
||||
private BytesRef analyzed = new BytesRef();
|
||||
private BytesRefBuilder analyzed = new BytesRefBuilder();
|
||||
private final SurfaceFormAndPayload[] surfaceFormsAndPayload;
|
||||
private int count;
|
||||
private ObjectIntOpenHashMap<BytesRef> seenSurfaceForms = HppcMaps.Object.Integer.ensureNoNullKeys(256, 0.75f);
|
||||
|
@ -986,8 +1016,8 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
|
||||
}
|
||||
public void startTerm(BytesRef analyzed) {
|
||||
this.analyzed.copyBytes(analyzed);
|
||||
this.analyzed.grow(analyzed.length+2);
|
||||
this.analyzed.copyBytes(analyzed);
|
||||
}
|
||||
|
||||
private final static class SurfaceFormAndPayload implements Comparable<SurfaceFormAndPayload> {
|
||||
|
@ -1063,14 +1093,15 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
public void finishTerm(long defaultWeight) throws IOException {
|
||||
ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
|
||||
int deduplicator = 0;
|
||||
analyzed.bytes[analyzed.offset + analyzed.length] = 0;
|
||||
analyzed.length += 2;
|
||||
analyzed.append((byte) 0);
|
||||
analyzed.setLength(analyzed.length() + 1);
|
||||
analyzed.grow(analyzed.length());
|
||||
for (int i = 0; i < count; i++) {
|
||||
analyzed.bytes[analyzed.offset + analyzed.length - 1 ] = (byte) deduplicator++;
|
||||
Util.toIntsRef(analyzed, scratchInts);
|
||||
analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
|
||||
Util.toIntsRef(analyzed.get(), scratchInts);
|
||||
SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
|
||||
long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
|
||||
builder.add(scratchInts, outputs.newPair(cost, candiate.payload));
|
||||
builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
|
||||
}
|
||||
seenSurfaceForms.clear();
|
||||
count = 0;
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStreamToAutomaton;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.*;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PairOutputs;
|
||||
|
@ -204,7 +205,7 @@ public final class XFuzzySuggester extends XAnalyzingSuggester {
|
|||
if (unicodeAware) {
|
||||
// FLORIAN EDIT: get converted Automaton from superclass
|
||||
Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
|
||||
BasicOperations.determinize(utf8automaton);
|
||||
utf8automaton = Operations.determinize(utf8automaton);
|
||||
return utf8automaton;
|
||||
} else {
|
||||
return super.convertAutomaton(a);
|
||||
|
@ -219,46 +220,40 @@ public final class XFuzzySuggester extends XAnalyzingSuggester {
|
|||
}
|
||||
|
||||
Automaton toLevenshteinAutomata(Automaton automaton) {
|
||||
final Set<IntsRef> ref = SpecialOperations.getFiniteStrings(automaton, -1);
|
||||
final Set<IntsRef> ref = Operations.getFiniteStrings(automaton, -1);
|
||||
Automaton subs[] = new Automaton[ref.size()];
|
||||
int upto = 0;
|
||||
for (IntsRef path : ref) {
|
||||
if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) {
|
||||
subs[upto] = BasicAutomata.makeString(path.ints, path.offset, path.length);
|
||||
upto++;
|
||||
} else {
|
||||
Automaton prefix = BasicAutomata.makeString(path.ints, path.offset, nonFuzzyPrefix);
|
||||
int ints[] = new int[path.length-nonFuzzyPrefix];
|
||||
System.arraycopy(path.ints, path.offset+nonFuzzyPrefix, ints, 0, ints.length);
|
||||
// TODO: maybe add alphaMin to LevenshteinAutomata,
|
||||
// and pass 1 instead of 0? We probably don't want
|
||||
// to allow the trailing dedup bytes to be
|
||||
// edited... but then 0 byte is "in general" allowed
|
||||
// on input (but not in UTF8).
|
||||
LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions);
|
||||
Automaton levAutomaton = lev.toAutomaton(maxEdits);
|
||||
Automaton combined = BasicOperations.concatenate(Arrays.asList(prefix, levAutomaton));
|
||||
combined.setDeterministic(true); // its like the special case in concatenate itself, except we cloneExpanded already
|
||||
subs[upto] = combined;
|
||||
upto++;
|
||||
}
|
||||
if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) {
|
||||
subs[upto] = Automata.makeString(path.ints, path.offset, path.length);
|
||||
upto++;
|
||||
} else {
|
||||
int ints[] = new int[path.length-nonFuzzyPrefix];
|
||||
System.arraycopy(path.ints, path.offset+nonFuzzyPrefix, ints, 0, ints.length);
|
||||
// TODO: maybe add alphaMin to LevenshteinAutomata,
|
||||
// and pass 1 instead of 0? We probably don't want
|
||||
// to allow the trailing dedup bytes to be
|
||||
// edited... but then 0 byte is "in general" allowed
|
||||
// on input (but not in UTF8).
|
||||
LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions);
|
||||
subs[upto] = lev.toAutomaton(maxEdits, UnicodeUtil.newString(path.ints, path.offset, nonFuzzyPrefix));
|
||||
upto++;
|
||||
}
|
||||
}
|
||||
|
||||
if (subs.length == 0) {
|
||||
// automaton is empty, there is no accepted paths through it
|
||||
return BasicAutomata.makeEmpty(); // matches nothing
|
||||
// automaton is empty, there is no accepted paths through it
|
||||
return Automata.makeEmpty(); // matches nothing
|
||||
} else if (subs.length == 1) {
|
||||
// no synonyms or anything: just a single path through the tokenstream
|
||||
return subs[0];
|
||||
// no synonyms or anything: just a single path through the tokenstream
|
||||
return subs[0];
|
||||
} else {
|
||||
// multiple paths: this is really scary! is it slow?
|
||||
// maybe we should not do this and throw UOE?
|
||||
Automaton a = BasicOperations.union(Arrays.asList(subs));
|
||||
// TODO: we could call toLevenshteinAutomata() before det?
|
||||
// this only happens if you have multiple paths anyway (e.g. synonyms)
|
||||
BasicOperations.determinize(a);
|
||||
|
||||
return a;
|
||||
// multiple paths: this is really scary! is it slow?
|
||||
// maybe we should not do this and throw UOE?
|
||||
Automaton a = Operations.union(Arrays.asList(subs));
|
||||
// TODO: we could call toLevenshteinAutomata() before det?
|
||||
// this only happens if you have multiple paths anyway (e.g. synonyms)
|
||||
return Operations.determinize(a);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -199,14 +199,14 @@ public class Version implements Serializable {
|
|||
public static final int V_1_3_3_ID = /*00*/1030399;
|
||||
public static final Version V_1_3_3 = new Version(V_1_3_3_ID, false, org.apache.lucene.util.Version.LUCENE_4_9);
|
||||
public static final int V_1_4_0_ID = /*00*/1040099;
|
||||
public static final Version V_1_4_0 = new Version(V_1_4_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_9);
|
||||
public static final Version V_1_4_0 = new Version(V_1_4_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_10_0);
|
||||
public static final int V_2_0_0_ID = /*00*/2000099;
|
||||
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_4_9);
|
||||
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_4_10_0);
|
||||
|
||||
public static final Version CURRENT = V_2_0_0;
|
||||
|
||||
static {
|
||||
assert CURRENT.luceneVersion == Lucene.VERSION : "Version must be upgraded to [" + Lucene.VERSION + "] is still set to [" + CURRENT.luceneVersion + "]";
|
||||
assert CURRENT.luceneVersion.equals(Lucene.VERSION) : "Version must be upgraded to [" + Lucene.VERSION + "] is still set to [" + CURRENT.luceneVersion + "]";
|
||||
}
|
||||
|
||||
public static Version readVersion(StreamInput in) throws IOException {
|
||||
|
|
|
@ -22,8 +22,7 @@ package org.elasticsearch.action.search.type;
|
|||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.action.search.SearchRequest;
|
||||
|
@ -85,21 +84,19 @@ public abstract class TransportSearchHelper {
|
|||
sb.append(entry.getKey()).append(':').append(entry.getValue()).append(';');
|
||||
}
|
||||
}
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
UnicodeUtil.UTF16toUTF8(sb, 0, sb.length(), bytesRef);
|
||||
|
||||
BytesRef bytesRef = new BytesRef(sb);
|
||||
return Base64.encodeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length, Base64.URL_SAFE);
|
||||
}
|
||||
|
||||
public static ParsedScrollId parseScrollId(String scrollId) {
|
||||
CharsRef spare = new CharsRef();
|
||||
CharsRefBuilder spare = new CharsRefBuilder();
|
||||
try {
|
||||
byte[] decode = Base64.decode(scrollId, Base64.URL_SAFE);
|
||||
UnicodeUtil.UTF8toUTF16(decode, 0, decode.length, spare);
|
||||
spare.copyUTF8Bytes(decode, 0, decode.length);
|
||||
} catch (Exception e) {
|
||||
throw new ElasticsearchIllegalArgumentException("Failed to decode scrollId", e);
|
||||
}
|
||||
String[] elements = Strings.splitStringToArray(spare, ';');
|
||||
String[] elements = Strings.splitStringToArray(spare.get(), ';');
|
||||
if (elements.length < 2) {
|
||||
throw new ElasticsearchIllegalArgumentException("Malformed scrollId [" + scrollId + "]");
|
||||
}
|
||||
|
|
|
@ -22,14 +22,12 @@ package org.elasticsearch.action.termvector;
|
|||
import com.carrotsearch.hppc.ObjectLongOpenHashMap;
|
||||
import com.carrotsearch.hppc.cursors.ObjectLongCursor;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
@ -227,8 +225,8 @@ public final class TermVectorFields extends Fields {
|
|||
int[] positions = new int[1];
|
||||
int[] startOffsets = new int[1];
|
||||
int[] endOffsets = new int[1];
|
||||
BytesRef[] payloads = new BytesRef[1];
|
||||
final BytesRef spare = new BytesRef();
|
||||
BytesRefBuilder[] payloads = new BytesRefBuilder[1];
|
||||
final BytesRefBuilder spare = new BytesRefBuilder();
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
|
@ -237,8 +235,8 @@ public final class TermVectorFields extends Fields {
|
|||
int termVectorSize = perFieldTermVectorInput.readVInt();
|
||||
spare.grow(termVectorSize);
|
||||
// ...then the value.
|
||||
perFieldTermVectorInput.readBytes(spare.bytes, 0, termVectorSize);
|
||||
spare.length = termVectorSize;
|
||||
perFieldTermVectorInput.readBytes(spare.bytes(), 0, termVectorSize);
|
||||
spare.setLength(termVectorSize);
|
||||
if (hasTermStatistic) {
|
||||
docFreq = readPotentiallyNegativeVInt(perFieldTermVectorInput);
|
||||
totalTermFrequency = readPotentiallyNegativeVLong(perFieldTermVectorInput);
|
||||
|
@ -253,7 +251,7 @@ public final class TermVectorFields extends Fields {
|
|||
// curentPosition etc. so that we can just iterate
|
||||
// later
|
||||
writeInfos(perFieldTermVectorInput);
|
||||
return spare;
|
||||
return spare.get();
|
||||
|
||||
} else {
|
||||
return null;
|
||||
|
@ -272,13 +270,11 @@ public final class TermVectorFields extends Fields {
|
|||
if (hasPayloads) {
|
||||
int payloadLength = input.readVInt();
|
||||
if (payloads[i] == null) {
|
||||
payloads[i] = new BytesRef(payloadLength);
|
||||
} else {
|
||||
payloads[i].grow(payloadLength);
|
||||
payloads[i] = new BytesRefBuilder();
|
||||
}
|
||||
input.readBytes(payloads[i].bytes, 0, payloadLength);
|
||||
payloads[i].length = payloadLength;
|
||||
payloads[i].offset = 0;
|
||||
payloads[i].grow(payloadLength);
|
||||
input.readBytes(payloads[i].bytes(), 0, payloadLength);
|
||||
payloads[i].setLength(payloadLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -293,9 +289,7 @@ public final class TermVectorFields extends Fields {
|
|||
}
|
||||
if (hasPayloads) {
|
||||
if (payloads.length < freq) {
|
||||
final BytesRef[] newArray = new BytesRef[ArrayUtil.oversize(freq, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
System.arraycopy(payloads, 0, newArray, 0, payloads.length);
|
||||
payloads = newArray;
|
||||
payloads = Arrays.copyOf(payloads, ArrayUtil.oversize(freq, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -317,7 +311,7 @@ public final class TermVectorFields extends Fields {
|
|||
|
||||
@Override
|
||||
public BytesRef term() throws IOException {
|
||||
return spare;
|
||||
return spare.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -406,10 +400,10 @@ public final class TermVectorFields extends Fields {
|
|||
private int freq;
|
||||
private int[] startOffsets;
|
||||
private int[] positions;
|
||||
private BytesRef[] payloads;
|
||||
private BytesRefBuilder[] payloads;
|
||||
private int[] endOffsets;
|
||||
|
||||
private DocsAndPositionsEnum reset(int[] positions, int[] startOffsets, int[] endOffsets, BytesRef[] payloads, int freq) {
|
||||
private DocsAndPositionsEnum reset(int[] positions, int[] startOffsets, int[] endOffsets, BytesRefBuilder[] payloads, int freq) {
|
||||
curPos = -1;
|
||||
doc = -1;
|
||||
this.hasPositions = positions != null;
|
||||
|
@ -468,7 +462,13 @@ public final class TermVectorFields extends Fields {
|
|||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
assert curPos < freq && curPos >= 0;
|
||||
return hasPayloads ? payloads[curPos] : null;
|
||||
if (hasPayloads) {
|
||||
final BytesRefBuilder payload = payloads[curPos];
|
||||
if (payload != null) {
|
||||
return payload.get();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -26,8 +26,7 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.action.ActionResponse;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
||||
|
@ -175,7 +174,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
return builder;
|
||||
}
|
||||
builder.startObject(FieldStrings.TERM_VECTORS);
|
||||
final CharsRef spare = new CharsRef();
|
||||
final CharsRefBuilder spare = new CharsRefBuilder();
|
||||
Fields theFields = getFields();
|
||||
Iterator<String> fieldIter = theFields.iterator();
|
||||
while (fieldIter.hasNext()) {
|
||||
|
@ -185,7 +184,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
return builder;
|
||||
}
|
||||
|
||||
private void buildField(XContentBuilder builder, final CharsRef spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
|
||||
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
|
||||
String fieldName = fieldIter.next();
|
||||
builder.startObject(fieldName);
|
||||
Terms curTerms = theFields.terms(fieldName);
|
||||
|
@ -200,10 +199,10 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
builder.endObject();
|
||||
}
|
||||
|
||||
private void buildTerm(XContentBuilder builder, final CharsRef spare, Terms curTerms, TermsEnum termIter) throws IOException {
|
||||
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter) throws IOException {
|
||||
// start term, optimized writing
|
||||
BytesRef term = termIter.next();
|
||||
UnicodeUtil.UTF8toUTF16(term, spare);
|
||||
spare.copyUTF8Bytes(term);
|
||||
builder.startObject(spare.toString());
|
||||
buildTermStatistics(builder, termIter);
|
||||
// finally write the term vectors
|
||||
|
|
|
@ -21,10 +21,7 @@ package org.elasticsearch.common;
|
|||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Iterables;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.FastStringReader;
|
||||
import org.elasticsearch.common.util.CollectionUtils;
|
||||
|
@ -1014,14 +1011,12 @@ public class Strings {
|
|||
}
|
||||
|
||||
public static byte[] toUTF8Bytes(CharSequence charSequence) {
|
||||
return toUTF8Bytes(charSequence, new BytesRef());
|
||||
return toUTF8Bytes(charSequence, new BytesRefBuilder());
|
||||
}
|
||||
|
||||
public static byte[] toUTF8Bytes(CharSequence charSequence, BytesRef spare) {
|
||||
UnicodeUtil.UTF16toUTF8(charSequence, 0, charSequence.length(), spare);
|
||||
final byte[] bytes = new byte[spare.length];
|
||||
System.arraycopy(spare.bytes, spare.offset, bytes, 0, bytes.length);
|
||||
return bytes;
|
||||
public static byte[] toUTF8Bytes(CharSequence charSequence, BytesRefBuilder spare) {
|
||||
spare.copyChars(charSequence);
|
||||
return Arrays.copyOf(spare.bytes(), spare.length());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.elasticsearch.common.bytes;
|
|||
|
||||
import com.google.common.base.Charsets;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.io.Channels;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||
|
@ -43,8 +42,7 @@ public class BytesArray implements BytesReference {
|
|||
private int length;
|
||||
|
||||
public BytesArray(String bytes) {
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
UnicodeUtil.UTF16toUTF8(bytes, 0, bytes.length(), bytesRef);
|
||||
BytesRef bytesRef = new BytesRef(bytes);
|
||||
this.bytes = bytesRef.bytes;
|
||||
this.offset = bytesRef.offset;
|
||||
this.length = bytesRef.length;
|
||||
|
|
|
@ -20,8 +20,7 @@
|
|||
package org.elasticsearch.common.bytes;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.io.Channels;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
|
@ -274,8 +273,8 @@ public class PagedBytesReference implements BytesReference {
|
|||
}
|
||||
|
||||
byte[] bytes = toBytes();
|
||||
final CharsRef ref = new CharsRef(length);
|
||||
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
||||
final CharsRefBuilder ref = new CharsRefBuilder();
|
||||
ref.copyUTF8Bytes(bytes, offset, length);
|
||||
return ref.toString();
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
package org.elasticsearch.common.io.stream;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -254,15 +254,14 @@ public abstract class StreamInput extends InputStream {
|
|||
return null;
|
||||
}
|
||||
|
||||
private final CharsRef spare = new CharsRef();
|
||||
private final CharsRefBuilder spare = new CharsRefBuilder();
|
||||
|
||||
public String readString() throws IOException {
|
||||
final int charCount = readVInt();
|
||||
spare.offset = 0;
|
||||
spare.length = 0;
|
||||
spare.clear();
|
||||
spare.grow(charCount);
|
||||
int c = 0;
|
||||
while (spare.length < charCount) {
|
||||
while (spare.length() < charCount) {
|
||||
c = readByte() & 0xff;
|
||||
switch (c >> 4) {
|
||||
case 0:
|
||||
|
@ -273,14 +272,14 @@ public abstract class StreamInput extends InputStream {
|
|||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
spare.chars[spare.length++] = (char) c;
|
||||
spare.append((char) c);
|
||||
break;
|
||||
case 12:
|
||||
case 13:
|
||||
spare.chars[spare.length++] = (char) ((c & 0x1F) << 6 | readByte() & 0x3F);
|
||||
spare.append((char) ((c & 0x1F) << 6 | readByte() & 0x3F));
|
||||
break;
|
||||
case 14:
|
||||
spare.chars[spare.length++] = (char) ((c & 0x0F) << 12 | (readByte() & 0x3F) << 6 | (readByte() & 0x3F) << 0);
|
||||
spare.append((char) ((c & 0x0F) << 12 | (readByte() & 0x3F) << 6 | (readByte() & 0x3F) << 0));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,17 +20,15 @@
|
|||
package org.elasticsearch.common.io.stream;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.UTF8StreamWriter;
|
||||
import org.elasticsearch.common.text.Text;
|
||||
import org.joda.time.ReadableInstant;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.lang.ref.SoftReference;
|
||||
import java.util.Date;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
|
@ -195,14 +193,14 @@ public abstract class StreamOutput extends OutputStream {
|
|||
}
|
||||
}
|
||||
|
||||
private final BytesRef spare = new BytesRef();
|
||||
private final BytesRefBuilder spare = new BytesRefBuilder();
|
||||
|
||||
public void writeText(Text text) throws IOException {
|
||||
if (!text.hasBytes()) {
|
||||
final String string = text.string();
|
||||
UnicodeUtil.UTF16toUTF8(string, 0, string.length(), spare);
|
||||
writeInt(spare.length);
|
||||
write(spare.bytes, spare.offset, spare.length);
|
||||
spare.copyChars(string);
|
||||
writeInt(spare.length());
|
||||
write(spare.bytes(), 0, spare.length());
|
||||
} else {
|
||||
BytesReference bytes = text.bytes();
|
||||
writeInt(bytes.length());
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -52,7 +53,7 @@ public class BytesRefs {
|
|||
return new BytesRef(value.toString());
|
||||
}
|
||||
|
||||
public static BytesRef toBytesRef(Object value, BytesRef spare) {
|
||||
public static BytesRef toBytesRef(Object value, BytesRefBuilder spare) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
|
@ -60,6 +61,6 @@ public class BytesRefs {
|
|||
return (BytesRef) value;
|
||||
}
|
||||
spare.copyChars(value.toString());
|
||||
return spare;
|
||||
return spare.get();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -79,8 +79,7 @@ public class HashedBytesRef {
|
|||
}
|
||||
|
||||
public static HashedBytesRef deepCopyOf(HashedBytesRef other) {
|
||||
BytesRef copy = new BytesRef();
|
||||
copy.copyBytes(other.bytes);
|
||||
BytesRef copy = BytesRef.deepCopyOf(other.bytes);
|
||||
return new HashedBytesRef(copy, other.hash);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,15 +51,14 @@ import static org.elasticsearch.common.lucene.search.NoopCollector.NOOP_COLLECTO
|
|||
*/
|
||||
public class Lucene {
|
||||
|
||||
public static final Version VERSION = Version.LUCENE_4_9;
|
||||
// TODO: remove VERSION, and have users use Version.LATEST.
|
||||
public static final Version VERSION = Version.LATEST;
|
||||
public static final Version ANALYZER_VERSION = VERSION;
|
||||
public static final Version QUERYPARSER_VERSION = VERSION;
|
||||
|
||||
public static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("_standard", AnalyzerScope.GLOBAL, new StandardAnalyzer(ANALYZER_VERSION));
|
||||
public static final NamedAnalyzer KEYWORD_ANALYZER = new NamedAnalyzer("_keyword", AnalyzerScope.GLOBAL, new KeywordAnalyzer());
|
||||
|
||||
public static final int NO_DOC = -1;
|
||||
|
||||
public static final ScoreDoc[] EMPTY_SCORE_DOCS = new ScoreDoc[0];
|
||||
|
||||
public static final TopDocs EMPTY_TOP_DOCS = new TopDocs(0, EMPTY_SCORE_DOCS, 0.0f);
|
||||
|
@ -69,27 +68,11 @@ public class Lucene {
|
|||
if (version == null) {
|
||||
return defaultVersion;
|
||||
}
|
||||
switch(version) {
|
||||
case "4.9": return VERSION.LUCENE_4_9;
|
||||
case "4.8": return VERSION.LUCENE_4_8;
|
||||
case "4.7": return VERSION.LUCENE_4_7;
|
||||
case "4.6": return VERSION.LUCENE_4_6;
|
||||
case "4.5": return VERSION.LUCENE_4_5;
|
||||
case "4.4": return VERSION.LUCENE_4_4;
|
||||
case "4.3": return VERSION.LUCENE_4_3;
|
||||
case "4.2": return VERSION.LUCENE_4_2;
|
||||
case "4.1": return VERSION.LUCENE_4_1;
|
||||
case "4.0": return VERSION.LUCENE_4_0;
|
||||
case "3.6": return VERSION.LUCENE_3_6;
|
||||
case "3.5": return VERSION.LUCENE_3_5;
|
||||
case "3.4": return VERSION.LUCENE_3_4;
|
||||
case "3.3": return VERSION.LUCENE_3_3;
|
||||
case "3.2": return VERSION.LUCENE_3_2;
|
||||
case "3.1": return VERSION.LUCENE_3_1;
|
||||
case "3.0": return VERSION.LUCENE_3_0;
|
||||
default:
|
||||
logger.warn("no version match {}, default to {}", version, defaultVersion);
|
||||
return defaultVersion;
|
||||
try {
|
||||
return Version.parse(version);
|
||||
} catch (IllegalArgumentException e) {
|
||||
logger.warn("no version match {}, default to {}", version, defaultVersion, e);
|
||||
return defaultVersion;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -580,10 +563,7 @@ public class Lucene {
|
|||
try {
|
||||
return Version.parseLeniently(toParse);
|
||||
} catch (IllegalArgumentException e) {
|
||||
final String parsedMatchVersion = toParse
|
||||
.toUpperCase(Locale.ROOT)
|
||||
.replaceFirst("^(\\d+)\\.(\\d+)(.(\\d+))+$", "LUCENE_$1_$2");
|
||||
return Version.valueOf(parsedMatchVersion);
|
||||
// pass to default
|
||||
}
|
||||
}
|
||||
return defaultValue;
|
||||
|
|
|
@ -46,9 +46,8 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.common.io.FastStringReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -804,10 +803,10 @@ public final class XMoreLikeThis {
|
|||
*/
|
||||
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
|
||||
final TermsEnum termsEnum = vector.iterator(null);
|
||||
final CharsRef spare = new CharsRef();
|
||||
final CharsRefBuilder spare = new CharsRefBuilder();
|
||||
BytesRef text;
|
||||
while((text = termsEnum.next()) != null) {
|
||||
UnicodeUtil.UTF8toUTF16(text, spare);
|
||||
spare.copyUTF8Bytes(text);
|
||||
final String term = spare.toString();
|
||||
if (isNoiseWord(term)) {
|
||||
continue;
|
||||
|
|
|
@ -305,10 +305,10 @@ public enum CollectionUtils {
|
|||
|
||||
};
|
||||
public static void sort(final BytesRefArray bytes, final int[] indices) {
|
||||
sort(new BytesRef(), new BytesRef(), bytes, indices);
|
||||
sort(new BytesRefBuilder(), new BytesRefBuilder(), bytes, indices);
|
||||
}
|
||||
|
||||
private static void sort(final BytesRef scratch, final BytesRef scratch1, final BytesRefArray bytes, final int[] indices) {
|
||||
private static void sort(final BytesRefBuilder scratch, final BytesRefBuilder scratch1, final BytesRefArray bytes, final int[] indices) {
|
||||
|
||||
final int numValues = bytes.size();
|
||||
assert indices.length >= numValues;
|
||||
|
@ -332,8 +332,8 @@ public enum CollectionUtils {
|
|||
}
|
||||
|
||||
public static int sortAndDedup(final BytesRefArray bytes, final int[] indices) {
|
||||
final BytesRef scratch = new BytesRef();
|
||||
final BytesRef scratch1 = new BytesRef();
|
||||
final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
final BytesRefBuilder scratch1 = new BytesRefBuilder();
|
||||
final int numValues = bytes.size();
|
||||
assert indices.length >= numValues;
|
||||
if (numValues <= 1) {
|
||||
|
@ -341,15 +341,15 @@ public enum CollectionUtils {
|
|||
}
|
||||
sort(scratch, scratch1, bytes, indices);
|
||||
int uniqueCount = 1;
|
||||
BytesRef previous = scratch;
|
||||
BytesRef current = scratch1;
|
||||
BytesRefBuilder previous = scratch;
|
||||
BytesRefBuilder current = scratch1;
|
||||
bytes.get(previous, indices[0]);
|
||||
for (int i = 1; i < numValues; ++i) {
|
||||
bytes.get(current, indices[i]);
|
||||
if (!previous.equals(current)) {
|
||||
if (!previous.get().equals(current.get())) {
|
||||
indices[uniqueCount++] = indices[i];
|
||||
}
|
||||
BytesRef tmp = previous;
|
||||
BytesRefBuilder tmp = previous;
|
||||
previous = current;
|
||||
current = tmp;
|
||||
}
|
||||
|
|
|
@ -23,12 +23,12 @@ import com.fasterxml.jackson.core.JsonParser;
|
|||
import com.fasterxml.jackson.core.JsonToken;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.common.xcontent.support.AbstractXContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.CharBuffer;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -88,9 +88,7 @@ public class JsonXContentParser extends AbstractXContentParser {
|
|||
|
||||
@Override
|
||||
public BytesRef utf8Bytes() throws IOException {
|
||||
BytesRef bytes = new BytesRef();
|
||||
UnicodeUtil.UTF16toUTF8(parser.getTextCharacters(), parser.getTextOffset(), parser.getTextLength(), bytes);
|
||||
return bytes;
|
||||
return new BytesRef(CharBuffer.wrap(parser.getTextCharacters(), parser.getTextOffset(), parser.getTextLength()));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.elasticsearch.http.netty;
|
|||
|
||||
import com.google.common.base.Strings;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.io.stream.ReleasableBytesStreamOutput;
|
||||
|
@ -54,8 +53,7 @@ public class NettyHttpChannel extends HttpChannel {
|
|||
private static final ChannelBuffer END_JSONP;
|
||||
|
||||
static {
|
||||
BytesRef U_END_JSONP = new BytesRef();
|
||||
UnicodeUtil.UTF16toUTF8(");", 0, ");".length(), U_END_JSONP);
|
||||
BytesRef U_END_JSONP = new BytesRef(");");
|
||||
END_JSONP = ChannelBuffers.wrappedBuffer(U_END_JSONP.bytes, U_END_JSONP.offset, U_END_JSONP.length);
|
||||
}
|
||||
|
||||
|
@ -147,8 +145,7 @@ public class NettyHttpChannel extends HttpChannel {
|
|||
// handle JSONP
|
||||
String callback = request.param("callback");
|
||||
if (callback != null) {
|
||||
final BytesRef callbackBytes = new BytesRef(callback.length() * 4 + 1);
|
||||
UnicodeUtil.UTF16toUTF8(callback, 0, callback.length(), callbackBytes);
|
||||
final BytesRef callbackBytes = new BytesRef(callback);
|
||||
callbackBytes.bytes[callbackBytes.length] = '(';
|
||||
callbackBytes.length++;
|
||||
buffer = ChannelBuffers.wrappedBuffer(
|
||||
|
|
|
@ -20,18 +20,19 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
|
||||
import org.elasticsearch.common.collect.UpdateInPlaceMap;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public final class FieldNameAnalyzer extends SimpleAnalyzerWrapper {
|
||||
public final class FieldNameAnalyzer extends DelegatingAnalyzerWrapper {
|
||||
|
||||
private final UpdateInPlaceMap<String, Analyzer> analyzers;
|
||||
private final Analyzer defaultAnalyzer;
|
||||
|
||||
public FieldNameAnalyzer(UpdateInPlaceMap<String, Analyzer> analyzers, Analyzer defaultAnalyzer) {
|
||||
super(Analyzer.PER_FIELD_REUSE_STRATEGY);
|
||||
this.analyzers = analyzers;
|
||||
this.defaultAnalyzer = defaultAnalyzer;
|
||||
}
|
||||
|
|
|
@ -20,13 +20,13 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
|
||||
|
||||
/**
|
||||
* Named analyzer is an analyzer wrapper around an actual analyzer ({@link #analyzer} that is associated
|
||||
* with a name ({@link #name()}.
|
||||
*/
|
||||
public class NamedAnalyzer extends SimpleAnalyzerWrapper {
|
||||
public class NamedAnalyzer extends DelegatingAnalyzerWrapper {
|
||||
|
||||
private final String name;
|
||||
private final AnalyzerScope scope;
|
||||
|
@ -46,6 +46,7 @@ public class NamedAnalyzer extends SimpleAnalyzerWrapper {
|
|||
}
|
||||
|
||||
public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer, int positionOffsetGap) {
|
||||
super(ERROR_STRATEGY);
|
||||
this.name = name;
|
||||
this.scope = scope;
|
||||
this.analyzer = analyzer;
|
||||
|
@ -90,4 +91,17 @@ public class NamedAnalyzer extends SimpleAnalyzerWrapper {
|
|||
public String toString() {
|
||||
return "analyzer name[" + name + "], analyzer [" + analyzer + "]";
|
||||
}
|
||||
|
||||
/** It is an error if this is ever used, it means we screwed up! */
|
||||
static final ReuseStrategy ERROR_STRATEGY = new Analyzer.ReuseStrategy() {
|
||||
@Override
|
||||
public TokenStreamComponents getReusableComponents(Analyzer a, String f) {
|
||||
throw new IllegalStateException("NamedAnalyzer cannot be wrapped with a wrapper, only a delegator");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setReusableComponents(Analyzer a, String f, TokenStreamComponents c) {
|
||||
throw new IllegalStateException("NamedAnalyzer cannot be wrapped with a wrapper, only a delegator");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -44,18 +44,20 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
|
|||
super(version, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
}
|
||||
|
||||
// TODO: add non Version based ctors?
|
||||
|
||||
public StandardHtmlStripAnalyzer(Version version, CharArraySet stopwords) {
|
||||
super(version, stopwords);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
|
||||
final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
|
||||
final StandardTokenizer src = new StandardTokenizer(getVersion(), reader);
|
||||
src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
TokenStream tok = new StandardFilter(matchVersion, src);
|
||||
tok = new LowerCaseFilter(matchVersion, tok);
|
||||
TokenStream tok = new StandardFilter(getVersion(), src);
|
||||
tok = new LowerCaseFilter(getVersion(), tok);
|
||||
if (!stopwords.isEmpty()) {
|
||||
tok = new StopFilter(matchVersion, tok, stopwords);
|
||||
tok = new StopFilter(getVersion(), tok, stopwords);
|
||||
}
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.index.SegmentReader;
|
|||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -102,7 +102,7 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
|
|||
@Override
|
||||
public void clear(String reason, String[] keys) {
|
||||
logger.debug("clear keys [], reason [{}]", reason, keys);
|
||||
final BytesRef spare = new BytesRef();
|
||||
final BytesRefBuilder spare = new BytesRefBuilder();
|
||||
for (String key : keys) {
|
||||
final byte[] keyBytes = Strings.toUTF8Bytes(key, spare);
|
||||
for (Object readerKey : seenReaders.keySet()) {
|
||||
|
|
|
@ -21,7 +21,7 @@ package org.elasticsearch.index.codec;
|
|||
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene49.Lucene49Codec;
|
||||
import org.apache.lucene.codecs.lucene410.Lucene410Codec;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
|
||||
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
|
||||
|
@ -37,7 +37,7 @@ import org.elasticsearch.index.mapper.MapperService;
|
|||
* configured for a specific field the default postings format is used.
|
||||
*/
|
||||
// LUCENE UPGRADE: make sure to move to a new codec depending on the lucene version
|
||||
public class PerFieldMappingPostingFormatCodec extends Lucene49Codec {
|
||||
public class PerFieldMappingPostingFormatCodec extends Lucene410Codec {
|
||||
private final ESLogger logger;
|
||||
private final MapperService mapperService;
|
||||
private final PostingsFormat defaultPostingFormat;
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
package org.elasticsearch.index.codec.docvaluesformat;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene410.Lucene410DocValuesFormat;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -36,7 +36,7 @@ public class DiskDocValuesFormatProvider extends AbstractDocValuesFormatProvider
|
|||
public DiskDocValuesFormatProvider(@Assisted String name, @Assisted Settings docValuesFormatSettings) {
|
||||
super(name);
|
||||
// TODO: log a warning if someone chooses this? just remove this together and map it to the 4.9 provider?
|
||||
this.docValuesFormat = new Lucene49DocValuesFormat();
|
||||
this.docValuesFormat = new Lucene410DocValuesFormat();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,10 +38,10 @@ public class DocValuesFormats {
|
|||
builtInDocValuesFormatsX.put(name, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormat.forName(name)));
|
||||
}
|
||||
// LUCENE UPGRADE: update those DVF if necessary
|
||||
builtInDocValuesFormatsX.put(DocValuesFormatService.DEFAULT_FORMAT, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormatService.DEFAULT_FORMAT, DocValuesFormat.forName("Lucene49")));
|
||||
builtInDocValuesFormatsX.put(DocValuesFormatService.DEFAULT_FORMAT, new PreBuiltDocValuesFormatProvider.Factory(DocValuesFormatService.DEFAULT_FORMAT, DocValuesFormat.forName("Lucene410")));
|
||||
builtInDocValuesFormatsX.put("memory", new PreBuiltDocValuesFormatProvider.Factory("memory", DocValuesFormat.forName("Memory")));
|
||||
builtInDocValuesFormatsX.put("disk", new PreBuiltDocValuesFormatProvider.Factory("disk", DocValuesFormat.forName("Lucene49")));
|
||||
builtInDocValuesFormatsX.put("Disk", new PreBuiltDocValuesFormatProvider.Factory("Disk", DocValuesFormat.forName("Lucene49")));
|
||||
builtInDocValuesFormatsX.put("disk", new PreBuiltDocValuesFormatProvider.Factory("disk", DocValuesFormat.forName("Lucene410")));
|
||||
builtInDocValuesFormatsX.put("Disk", new PreBuiltDocValuesFormatProvider.Factory("Disk", DocValuesFormat.forName("Lucene410")));
|
||||
builtInDocValuesFormats = builtInDocValuesFormatsX.immutableMap();
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.elasticsearch.common.Nullable;
|
|||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Streamable;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -36,7 +37,7 @@ public class Segment implements Streamable {
|
|||
public long sizeInBytes = -1;
|
||||
public int docCount = -1;
|
||||
public int delDocCount = -1;
|
||||
public String version = null;
|
||||
public org.apache.lucene.util.Version version = null;
|
||||
public Boolean compound = null;
|
||||
public String mergeId;
|
||||
public long memoryInBytes;
|
||||
|
@ -81,7 +82,7 @@ public class Segment implements Streamable {
|
|||
return this.sizeInBytes;
|
||||
}
|
||||
|
||||
public String getVersion() {
|
||||
public org.apache.lucene.util.Version getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
|
@ -138,7 +139,7 @@ public class Segment implements Streamable {
|
|||
docCount = in.readInt();
|
||||
delDocCount = in.readInt();
|
||||
sizeInBytes = in.readLong();
|
||||
version = in.readOptionalString();
|
||||
version = Lucene.parseVersionLenient(in.readOptionalString(), null);
|
||||
compound = in.readOptionalBoolean();
|
||||
mergeId = in.readOptionalString();
|
||||
memoryInBytes = in.readLong();
|
||||
|
@ -152,7 +153,7 @@ public class Segment implements Streamable {
|
|||
out.writeInt(docCount);
|
||||
out.writeInt(delDocCount);
|
||||
out.writeLong(sizeInBytes);
|
||||
out.writeOptionalString(version);
|
||||
out.writeOptionalString(version.toString());
|
||||
out.writeOptionalBoolean(compound);
|
||||
out.writeOptionalString(mergeId);
|
||||
out.writeLong(memoryInBytes);
|
||||
|
|
|
@ -1042,7 +1042,11 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
|
|||
}
|
||||
// wait for the merges outside of the read lock
|
||||
if (optimize.waitForMerge()) {
|
||||
currentIndexWriter().waitForMerges();
|
||||
try {
|
||||
currentIndexWriter().waitForMerges();
|
||||
} catch (IOException e) {
|
||||
throw new OptimizeFailedEngineException(shardId, e);
|
||||
}
|
||||
}
|
||||
if (optimize.flush()) {
|
||||
flush(new Flush().force(true).waitIfOngoing(true));
|
||||
|
@ -1383,7 +1387,7 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
|
|||
config.setIndexDeletionPolicy(deletionPolicy);
|
||||
config.setInfoStream(new LoggerInfoStream(indexSettings, shardId));
|
||||
config.setMergeScheduler(mergeScheduler.newMergeScheduler());
|
||||
MergePolicy mergePolicy = mergePolicyProvider.newMergePolicy();
|
||||
MergePolicy mergePolicy = mergePolicyProvider.getMergePolicy();
|
||||
// Give us the opportunity to upgrade old segments while performing
|
||||
// background merges
|
||||
mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
|
||||
|
|
|
@ -20,9 +20,11 @@
|
|||
package org.elasticsearch.index.fielddata;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.common.geo.GeoPoint;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -33,10 +35,6 @@ import java.util.List;
|
|||
public enum FieldData {
|
||||
;
|
||||
|
||||
static {
|
||||
assert Lucene.VERSION == Version.LUCENE_4_9 : "Remove emptySortedNumeric in 4.10 and use the method with the same name from Lucene's DocValues class. See LUCENE-5834.";
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a {@link SortedBinaryDocValues} that doesn't contain any value.
|
||||
*/
|
||||
|
@ -44,13 +42,6 @@ public enum FieldData {
|
|||
return singleton(DocValues.emptyBinary(), new Bits.MatchNoBits(maxDoc));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a {@link SortedNumericDocValues} that doesn't contain any value.
|
||||
*/
|
||||
public static SortedNumericDocValues emptySortedNumeric(int maxDoc) {
|
||||
return DocValues.singleton(DocValues.emptyNumeric(), new Bits.MatchNoBits(maxDoc));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a {@link NumericDoubleValues} that doesn't contain any value.
|
||||
*/
|
||||
|
|
|
@ -23,8 +23,8 @@ import org.apache.lucene.index.AtomicReaderContext;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
|
@ -115,9 +115,10 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
|
|||
* since {@link Character#MAX_CODE_POINT} is a noncharacter and thus shouldn't appear in an index term. */
|
||||
public static final BytesRef MAX_TERM;
|
||||
static {
|
||||
MAX_TERM = new BytesRef();
|
||||
BytesRefBuilder builder = new BytesRefBuilder();
|
||||
final char[] chars = Character.toChars(Character.MAX_CODE_POINT);
|
||||
UnicodeUtil.UTF16toUTF8(chars, 0, chars.length, MAX_TERM);
|
||||
builder.copyChars(chars, 0, chars.length);
|
||||
MAX_TERM = builder.toBytesRef();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.fielddata;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||
|
||||
|
@ -37,7 +38,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
|
|||
}
|
||||
|
||||
@Override
|
||||
public void toIndexForm(Number number, BytesRef bytes) {
|
||||
public void toIndexForm(Number number, BytesRefBuilder bytes) {
|
||||
INT.toIndexForm(number, bytes);
|
||||
}
|
||||
|
||||
|
@ -53,7 +54,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
|
|||
}
|
||||
|
||||
@Override
|
||||
public void toIndexForm(Number number, BytesRef bytes) {
|
||||
public void toIndexForm(Number number, BytesRefBuilder bytes) {
|
||||
INT.toIndexForm(number, bytes);
|
||||
}
|
||||
|
||||
|
@ -69,7 +70,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
|
|||
}
|
||||
|
||||
@Override
|
||||
public void toIndexForm(Number number, BytesRef bytes) {
|
||||
public void toIndexForm(Number number, BytesRefBuilder bytes) {
|
||||
NumericUtils.intToPrefixCodedBytes(number.intValue(), 0, bytes);
|
||||
}
|
||||
|
||||
|
@ -85,7 +86,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
|
|||
}
|
||||
|
||||
@Override
|
||||
public void toIndexForm(Number number, BytesRef bytes) {
|
||||
public void toIndexForm(Number number, BytesRefBuilder bytes) {
|
||||
NumericUtils.longToPrefixCodedBytes(number.longValue(), 0, bytes);
|
||||
}
|
||||
|
||||
|
@ -101,7 +102,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
|
|||
}
|
||||
|
||||
@Override
|
||||
public void toIndexForm(Number number, BytesRef bytes) {
|
||||
public void toIndexForm(Number number, BytesRefBuilder bytes) {
|
||||
NumericUtils.intToPrefixCodedBytes(NumericUtils.floatToSortableInt(number.floatValue()), 0, bytes);
|
||||
}
|
||||
|
||||
|
@ -117,7 +118,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
|
|||
}
|
||||
|
||||
@Override
|
||||
public void toIndexForm(Number number, BytesRef bytes) {
|
||||
public void toIndexForm(Number number, BytesRefBuilder bytes) {
|
||||
NumericUtils.longToPrefixCodedBytes(NumericUtils.doubleToSortableLong(number.doubleValue()), 0, bytes);
|
||||
}
|
||||
|
||||
|
@ -160,7 +161,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
|
|||
return requiredBits;
|
||||
}
|
||||
|
||||
public abstract void toIndexForm(Number number, BytesRef bytes);
|
||||
public abstract void toIndexForm(Number number, BytesRefBuilder bytes);
|
||||
|
||||
public long toLong(BytesRef indexForm) {
|
||||
return (long) toDouble(indexForm);
|
||||
|
|
|
@ -29,11 +29,11 @@ import java.util.Arrays;
|
|||
public abstract class SortingBinaryDocValues extends SortedBinaryDocValues {
|
||||
|
||||
protected int count;
|
||||
protected BytesRef[] values;
|
||||
protected BytesRefBuilder[] values;
|
||||
private final Sorter sorter;
|
||||
|
||||
protected SortingBinaryDocValues() {
|
||||
values = new BytesRef[] { new BytesRef() };
|
||||
values = new BytesRefBuilder[] { new BytesRefBuilder() };
|
||||
sorter = new InPlaceMergeSorter() {
|
||||
|
||||
@Override
|
||||
|
@ -43,7 +43,7 @@ public abstract class SortingBinaryDocValues extends SortedBinaryDocValues {
|
|||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return values[i].compareTo(values[j]);
|
||||
return values[i].get().compareTo(values[j].get());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -57,7 +57,7 @@ public abstract class SortingBinaryDocValues extends SortedBinaryDocValues {
|
|||
final int newLen = ArrayUtil.oversize(count, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||
values = Arrays.copyOf(values, newLen);
|
||||
for (int i = oldLen; i < newLen; ++i) {
|
||||
values[i] = new BytesRef();
|
||||
values[i] = new BytesRefBuilder();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -77,6 +77,6 @@ public abstract class SortingBinaryDocValues extends SortedBinaryDocValues {
|
|||
|
||||
@Override
|
||||
public final BytesRef valueAt(int index) {
|
||||
return values[index];
|
||||
return values[index].get();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,15 +23,12 @@ import org.apache.lucene.index.AtomicReaderContext;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.RandomAccessOrds;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
|
||||
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
||||
|
@ -110,7 +107,7 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
|
|||
|
||||
final BytesRef nullPlaceHolder = new BytesRef();
|
||||
final BytesRef nonNullMissingBytes = missingBytes == null ? nullPlaceHolder : missingBytes;
|
||||
return new TermValComparator(numHits, null, sortMissingLast) {
|
||||
return new FieldComparator.TermValComparator(numHits, null, sortMissingLast) {
|
||||
|
||||
@Override
|
||||
protected BinaryDocValues getBinaryDocValues(AtomicReaderContext context, String field) throws IOException {
|
||||
|
@ -211,137 +208,4 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
|
|||
|
||||
// we let termsenum etc fall back to the default implementation
|
||||
}
|
||||
|
||||
static {
|
||||
assert Lucene.VERSION == Version.LUCENE_4_9 : "The comparator below is a raw copy of Lucene's, remove it when upgrading to 4.10";
|
||||
}
|
||||
|
||||
/** Sorts by field's natural Term sort order. All
|
||||
* comparisons are done using BytesRef.compareTo, which is
|
||||
* slow for medium to large result sets but possibly
|
||||
* very fast for very small results sets. */
|
||||
public static class TermValComparator extends FieldComparator<BytesRef> {
|
||||
|
||||
private final BytesRef[] values;
|
||||
private final BytesRef[] tempBRs;
|
||||
private BinaryDocValues docTerms;
|
||||
private Bits docsWithField;
|
||||
private final String field;
|
||||
private BytesRef bottom;
|
||||
private BytesRef topValue;
|
||||
private final int missingSortCmp;
|
||||
|
||||
/** Sole constructor. */
|
||||
public TermValComparator(int numHits, String field, boolean sortMissingLast) {
|
||||
values = new BytesRef[numHits];
|
||||
tempBRs = new BytesRef[numHits];
|
||||
this.field = field;
|
||||
missingSortCmp = sortMissingLast ? 1 : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
final BytesRef val1 = values[slot1];
|
||||
final BytesRef val2 = values[slot2];
|
||||
return compareValues(val1, val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
final BytesRef comparableBytes = getComparableBytes(doc, docTerms.get(doc));
|
||||
return compareValues(bottom, comparableBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) {
|
||||
final BytesRef comparableBytes = getComparableBytes(doc, docTerms.get(doc));
|
||||
if (comparableBytes == null) {
|
||||
values[slot] = null;
|
||||
} else {
|
||||
if (tempBRs[slot] == null) {
|
||||
tempBRs[slot] = new BytesRef();
|
||||
}
|
||||
values[slot] = tempBRs[slot];
|
||||
values[slot].copyBytes(comparableBytes);
|
||||
}
|
||||
}
|
||||
|
||||
/** Retrieves the BinaryDocValues for the field in this segment */
|
||||
protected BinaryDocValues getBinaryDocValues(AtomicReaderContext context, String field) throws IOException {
|
||||
return FieldCache.DEFAULT.getTerms(context.reader(), field, true);
|
||||
}
|
||||
|
||||
/** Retrieves the set of documents that have a value in this segment */
|
||||
protected Bits getDocsWithField(AtomicReaderContext context, String field) throws IOException {
|
||||
return FieldCache.DEFAULT.getDocsWithField(context.reader(), field);
|
||||
}
|
||||
|
||||
/** Check whether the given value represents <tt>null</tt>. This can be
|
||||
* useful if the {@link BinaryDocValues} returned by {@link #getBinaryDocValues}
|
||||
* use a special value as a sentinel. The default implementation checks
|
||||
* {@link #getDocsWithField}.
|
||||
* <p>NOTE: The null value can only be an EMPTY {@link BytesRef}. */
|
||||
protected boolean isNull(int doc, BytesRef term) {
|
||||
return docsWithField != null && docsWithField.get(doc) == false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
|
||||
docTerms = getBinaryDocValues(context, field);
|
||||
docsWithField = getDocsWithField(context, field);
|
||||
if (docsWithField instanceof Bits.MatchAllBits) {
|
||||
docsWithField = null;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(final int bottom) {
|
||||
this.bottom = values[bottom];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(BytesRef value) {
|
||||
// null is fine: it means the last doc of the prior
|
||||
// search was missing this value
|
||||
topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareValues(BytesRef val1, BytesRef val2) {
|
||||
// missing always sorts first:
|
||||
if (val1 == null) {
|
||||
if (val2 == null) {
|
||||
return 0;
|
||||
}
|
||||
return missingSortCmp;
|
||||
} else if (val2 == null) {
|
||||
return -missingSortCmp;
|
||||
}
|
||||
return val1.compareTo(val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) {
|
||||
final BytesRef comparableBytes = getComparableBytes(doc, docTerms.get(doc));
|
||||
return compareValues(topValue, comparableBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a document and a term, return the term itself if it exists or
|
||||
* <tt>null</tt> otherwise.
|
||||
*/
|
||||
private BytesRef getComparableBytes(int doc, BytesRef term) {
|
||||
if (term.length == 0 && isNull(doc, term)) {
|
||||
return null;
|
||||
}
|
||||
return term;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.fielddata.ordinals;
|
||||
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.index.RandomAccessOrds;
|
||||
import org.apache.lucene.index.XOrdinalMap;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.elasticsearch.index.fielddata.AbstractRandomAccessOrds;
|
||||
|
@ -31,11 +31,11 @@ import org.elasticsearch.index.fielddata.AbstractRandomAccessOrds;
|
|||
public class GlobalOrdinalMapping extends AbstractRandomAccessOrds {
|
||||
|
||||
private final RandomAccessOrds values;
|
||||
private final XOrdinalMap ordinalMap;
|
||||
private final OrdinalMap ordinalMap;
|
||||
private final LongValues mapping;
|
||||
private final RandomAccessOrds[] bytesValues;
|
||||
|
||||
GlobalOrdinalMapping(XOrdinalMap ordinalMap, RandomAccessOrds[] bytesValues, int segmentIndex) {
|
||||
GlobalOrdinalMapping(OrdinalMap ordinalMap, RandomAccessOrds[] bytesValues, int segmentIndex) {
|
||||
super();
|
||||
this.values = bytesValues[segmentIndex];
|
||||
this.bytesValues = bytesValues;
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
package org.elasticsearch.index.fielddata.ordinals;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.index.RandomAccessOrds;
|
||||
import org.apache.lucene.index.XOrdinalMap;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.common.breaker.CircuitBreaker;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
|
@ -51,7 +51,7 @@ public enum GlobalOrdinalsBuilder {
|
|||
atomicFD[i] = indexFieldData.load(indexReader.leaves().get(i));
|
||||
subs[i] = atomicFD[i].getOrdinalsValues();
|
||||
}
|
||||
final XOrdinalMap ordinalMap = XOrdinalMap.build(null, subs, PackedInts.DEFAULT);
|
||||
final OrdinalMap ordinalMap = OrdinalMap.build(null, subs, PackedInts.DEFAULT);
|
||||
final long memorySizeInBytes = ordinalMap.ramBytesUsed();
|
||||
breakerService.getBreaker(CircuitBreaker.Name.FIELDDATA).addWithoutBreaking(memorySizeInBytes);
|
||||
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
package org.elasticsearch.index.fielddata.ordinals;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.index.RandomAccessOrds;
|
||||
import org.apache.lucene.index.XOrdinalMap;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData;
|
||||
|
@ -35,7 +35,7 @@ final class InternalGlobalOrdinalsIndexFieldData extends GlobalOrdinalsIndexFiel
|
|||
|
||||
private final Atomic[] atomicReaders;
|
||||
|
||||
InternalGlobalOrdinalsIndexFieldData(Index index, Settings settings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, AtomicOrdinalsFieldData[] segmentAfd, XOrdinalMap ordinalMap, long memorySizeInBytes) {
|
||||
InternalGlobalOrdinalsIndexFieldData(Index index, Settings settings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, AtomicOrdinalsFieldData[] segmentAfd, OrdinalMap ordinalMap, long memorySizeInBytes) {
|
||||
super(index, settings, fieldNames, fieldDataType, memorySizeInBytes);
|
||||
this.atomicReaders = new Atomic[segmentAfd.length];
|
||||
for (int i = 0; i < segmentAfd.length; i++) {
|
||||
|
@ -51,10 +51,10 @@ final class InternalGlobalOrdinalsIndexFieldData extends GlobalOrdinalsIndexFiel
|
|||
private final class Atomic extends AbstractAtomicOrdinalsFieldData {
|
||||
|
||||
private final AtomicOrdinalsFieldData afd;
|
||||
private final XOrdinalMap ordinalMap;
|
||||
private final OrdinalMap ordinalMap;
|
||||
private final int segmentIndex;
|
||||
|
||||
private Atomic(AtomicOrdinalsFieldData afd, XOrdinalMap ordinalMap, int segmentIndex) {
|
||||
private Atomic(AtomicOrdinalsFieldData afd, OrdinalMap ordinalMap, int segmentIndex) {
|
||||
this.afd = afd;
|
||||
this.ordinalMap = ordinalMap;
|
||||
this.segmentIndex = segmentIndex;
|
||||
|
|
|
@ -24,9 +24,8 @@ import org.apache.lucene.index.RandomAccessOrds;
|
|||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
import org.elasticsearch.index.fielddata.AbstractRandomAccessOrds;
|
||||
|
||||
/**
|
||||
|
@ -57,26 +56,26 @@ public class MultiOrdinals extends Ordinals {
|
|||
|
||||
private final boolean multiValued;
|
||||
private final long valueCount;
|
||||
private final MonotonicAppendingLongBuffer endOffsets;
|
||||
private final AppendingPackedLongBuffer ords;
|
||||
private final PackedLongValues endOffsets;
|
||||
private final PackedLongValues ords;
|
||||
|
||||
public MultiOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) {
|
||||
multiValued = builder.getNumMultiValuesDocs() > 0;
|
||||
valueCount = builder.getValueCount();
|
||||
endOffsets = new MonotonicAppendingLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
|
||||
ords = new AppendingPackedLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
|
||||
PackedLongValues.Builder endOffsetsBuilder = PackedLongValues.monotonicBuilder(OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
|
||||
PackedLongValues.Builder ordsBuilder = PackedLongValues.packedBuilder(OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
|
||||
long lastEndOffset = 0;
|
||||
for (int i = 0; i < builder.maxDoc(); ++i) {
|
||||
final LongsRef docOrds = builder.docOrds(i);
|
||||
final long endOffset = lastEndOffset + docOrds.length;
|
||||
endOffsets.add(endOffset);
|
||||
endOffsetsBuilder.add(endOffset);
|
||||
for (int j = 0; j < docOrds.length; ++j) {
|
||||
ords.add(docOrds.longs[docOrds.offset + j]);
|
||||
ordsBuilder.add(docOrds.longs[docOrds.offset + j]);
|
||||
}
|
||||
lastEndOffset = endOffset;
|
||||
}
|
||||
endOffsets.freeze();
|
||||
ords.freeze();
|
||||
endOffsets = endOffsetsBuilder.build();
|
||||
ords = ordsBuilder.build();
|
||||
assert endOffsets.size() == builder.maxDoc();
|
||||
assert ords.size() == builder.getTotalNumOrds() : ords.size() + " != " + builder.getTotalNumOrds();
|
||||
}
|
||||
|
@ -98,8 +97,8 @@ public class MultiOrdinals extends Ordinals {
|
|||
private static class SingleDocs extends SortedDocValues {
|
||||
|
||||
private final int valueCount;
|
||||
private final MonotonicAppendingLongBuffer endOffsets;
|
||||
private final AppendingPackedLongBuffer ords;
|
||||
private final PackedLongValues endOffsets;
|
||||
private final PackedLongValues ords;
|
||||
private final ValuesHolder values;
|
||||
|
||||
SingleDocs(MultiOrdinals ordinals, ValuesHolder values) {
|
||||
|
@ -131,8 +130,8 @@ public class MultiOrdinals extends Ordinals {
|
|||
private static class MultiDocs extends AbstractRandomAccessOrds {
|
||||
|
||||
private final long valueCount;
|
||||
private final MonotonicAppendingLongBuffer endOffsets;
|
||||
private final AppendingPackedLongBuffer ords;
|
||||
private final PackedLongValues endOffsets;
|
||||
private final PackedLongValues ords;
|
||||
private long offset;
|
||||
private int cardinality;
|
||||
private final ValuesHolder values;
|
||||
|
|
|
@ -21,8 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.geo.GeoPoint;
|
||||
|
@ -41,12 +40,12 @@ abstract class AbstractIndexGeoPointFieldData extends AbstractIndexFieldData<Ato
|
|||
|
||||
private final BytesRefIterator termsEnum;
|
||||
private final GeoPoint next;
|
||||
private final CharsRef spare;
|
||||
private final CharsRefBuilder spare;
|
||||
|
||||
protected GeoPointEnum(BytesRefIterator termsEnum) {
|
||||
this.termsEnum = termsEnum;
|
||||
next = new GeoPoint();
|
||||
spare = new CharsRef();
|
||||
spare = new CharsRefBuilder();
|
||||
}
|
||||
|
||||
public GeoPoint next() throws IOException {
|
||||
|
@ -54,10 +53,10 @@ abstract class AbstractIndexGeoPointFieldData extends AbstractIndexFieldData<Ato
|
|||
if (term == null) {
|
||||
return null;
|
||||
}
|
||||
UnicodeUtil.UTF8toUTF16(term, spare);
|
||||
spare.copyUTF8Bytes(term);
|
||||
int commaIndex = -1;
|
||||
for (int i = 0; i < spare.length; i++) {
|
||||
if (spare.chars[spare.offset + i] == ',') { // saves a string creation
|
||||
for (int i = 0; i < spare.length(); i++) {
|
||||
if (spare.charAt(i) == ',') { // saves a string creation
|
||||
commaIndex = i;
|
||||
break;
|
||||
}
|
||||
|
@ -66,8 +65,8 @@ abstract class AbstractIndexGeoPointFieldData extends AbstractIndexFieldData<Ato
|
|||
assert false;
|
||||
return next.reset(0, 0);
|
||||
}
|
||||
final double lat = Double.parseDouble(new String(spare.chars, spare.offset, (commaIndex - spare.offset)));
|
||||
final double lon = Double.parseDouble(new String(spare.chars, (spare.offset + (commaIndex + 1)), spare.length - ((commaIndex + 1) - spare.offset)));
|
||||
final double lat = Double.parseDouble(new String(spare.chars(), 0, commaIndex));
|
||||
final double lon = Double.parseDouble(new String(spare.chars(), commaIndex + 1, spare.length() - (commaIndex + 1)));
|
||||
return next.reset(lat, lon);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,8 +20,7 @@ package org.elasticsearch.index.fielddata.plain;
|
|||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -31,8 +30,8 @@ import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.N
|
|||
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
|
||||
import org.elasticsearch.index.mapper.FieldMapper.Names;
|
||||
import org.elasticsearch.search.MultiValueMode;
|
||||
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
||||
import org.elasticsearch.search.MultiValueMode;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
@ -138,7 +137,7 @@ public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldD
|
|||
private static final class RegexFilter extends FilteredTermsEnum {
|
||||
|
||||
private final Matcher matcher;
|
||||
private final CharsRef spare = new CharsRef();
|
||||
private final CharsRefBuilder spare = new CharsRefBuilder();
|
||||
|
||||
public RegexFilter(TermsEnum delegate, Matcher matcher) {
|
||||
super(delegate, false);
|
||||
|
@ -155,8 +154,8 @@ public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldD
|
|||
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef arg0) throws IOException {
|
||||
UnicodeUtil.UTF8toUTF16(arg0, spare);
|
||||
matcher.reset(spare);
|
||||
spare.copyUTF8Bytes(arg0);
|
||||
matcher.reset(spare.get());
|
||||
if (matcher.matches()) {
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
|
||||
|
@ -59,7 +60,7 @@ abstract class AtomicLongFieldData implements AtomicNumericFieldData {
|
|||
|
||||
@Override
|
||||
public SortedNumericDocValues getLongValues() {
|
||||
return FieldData.emptySortedNumeric(maxDoc);
|
||||
return DocValues.emptySortedNumeric(maxDoc);
|
||||
}
|
||||
|
||||
};
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.index.BinaryDocValues;
|
|||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||
|
@ -49,7 +50,7 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData {
|
|||
return new SortedBinaryDocValues() {
|
||||
|
||||
int count;
|
||||
BytesRef[] refs = new BytesRef[0];
|
||||
BytesRefBuilder[] refs = new BytesRefBuilder[0];
|
||||
final ByteArrayDataInput in = new ByteArrayDataInput();
|
||||
|
||||
@Override
|
||||
|
@ -64,16 +65,15 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData {
|
|||
final int previousLength = refs.length;
|
||||
refs = Arrays.copyOf(refs, ArrayUtil.oversize(count, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
|
||||
for (int i = previousLength; i < refs.length; ++i) {
|
||||
refs[i] = new BytesRef();
|
||||
refs[i] = new BytesRefBuilder();
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < count; ++i) {
|
||||
final int length = in.readVInt();
|
||||
final BytesRef scratch = refs[i];
|
||||
final BytesRefBuilder scratch = refs[i];
|
||||
scratch.grow(length);
|
||||
in.readBytes(scratch.bytes, 0, length);
|
||||
scratch.length = length;
|
||||
scratch.offset = 0;
|
||||
in.readBytes(scratch.bytes(), 0, length);
|
||||
scratch.setLength(length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData {
|
|||
|
||||
@Override
|
||||
public BytesRef valueAt(int index) {
|
||||
return refs[index];
|
||||
return refs[index].get();
|
||||
}
|
||||
|
||||
};
|
||||
|
|
|
@ -21,7 +21,9 @@ package org.elasticsearch.index.fielddata.plain;
|
|||
import org.apache.lucene.index.RandomAccessOrds;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FST.Arc;
|
||||
import org.apache.lucene.util.fst.FST.BytesReader;
|
||||
|
@ -71,15 +73,15 @@ public class FSTBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData {
|
|||
private final FST<Long> fst;
|
||||
|
||||
// per-thread resources
|
||||
private final BytesRef scratch;
|
||||
private final BytesRefBuilder scratch;
|
||||
protected final BytesReader in;
|
||||
protected final Arc<Long> firstArc = new Arc<>();
|
||||
protected final Arc<Long> scratchArc = new Arc<>();
|
||||
protected final IntsRef scratchInts = new IntsRef();
|
||||
protected final IntsRefBuilder scratchInts = new IntsRefBuilder();
|
||||
|
||||
ValuesHolder(FST<Long> fst) {
|
||||
this.fst = fst;
|
||||
scratch = new BytesRef();
|
||||
scratch = new BytesRefBuilder();
|
||||
in = fst.getBytesReader();
|
||||
}
|
||||
|
||||
|
@ -90,13 +92,13 @@ public class FSTBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData {
|
|||
fst.getFirstArc(firstArc);
|
||||
try {
|
||||
IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
|
||||
scratch.length = scratch.offset = 0;
|
||||
scratch.clear();
|
||||
scratch.grow(output.length);
|
||||
Util.toBytesRef(output, scratch);
|
||||
} catch (IOException ex) {
|
||||
//bogus
|
||||
}
|
||||
return scratch;
|
||||
return scratch.get();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.elasticsearch.index.fielddata.plain;
|
|||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
@ -72,7 +72,7 @@ public class FSTBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
|
|||
}
|
||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<>(INPUT_TYPE.BYTE1, outputs);
|
||||
final IntsRef scratch = new IntsRef();
|
||||
final IntsRefBuilder scratch = new IntsRefBuilder();
|
||||
|
||||
final long numTerms;
|
||||
if (regex == null && frequency == null) {
|
||||
|
|
|
@ -22,9 +22,8 @@ package org.elasticsearch.index.fielddata.plain;
|
|||
import com.google.common.base.Preconditions;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.breaker.CircuitBreaker;
|
||||
|
@ -97,7 +96,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||
// Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
|
||||
// longs is going to be monotonically increasing
|
||||
final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();
|
||||
final PackedLongValues.Builder valuesBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
|
||||
|
||||
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
|
||||
TermsEnum termsEnum = estimator.beforeLoad(terms);
|
||||
|
@ -111,9 +110,9 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
final long value = indexedAsLong
|
||||
? NumericUtils.prefixCodedToLong(term)
|
||||
: NumericUtils.prefixCodedToInt(term);
|
||||
assert values.size() == 0 || value > values.get(values.size() - 1);
|
||||
values.add(value);
|
||||
valuesBuilder.add(value);
|
||||
}
|
||||
final PackedLongValues values = valuesBuilder.build();
|
||||
final Ordinals build = builder.build(fieldDataType.getSettings());
|
||||
CommonSettings.MemoryStorageFormat formatHint = CommonSettings.getMemoryStorageHint(fieldDataType);
|
||||
|
||||
|
@ -206,7 +205,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
};
|
||||
break;
|
||||
case PAGED:
|
||||
final AppendingDeltaPackedLongBuffer dpValues = new AppendingDeltaPackedLongBuffer(reader.maxDoc() / pageSize + 1, pageSize, acceptableOverheadRatio);
|
||||
final PackedLongValues.Builder dpValues = PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio);
|
||||
|
||||
long lastValue = 0;
|
||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||
|
@ -217,13 +216,13 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
}
|
||||
dpValues.add(lastValue);
|
||||
}
|
||||
dpValues.freeze();
|
||||
ramBytesUsed = dpValues.ramBytesUsed();
|
||||
final PackedLongValues pagedValues = dpValues.build();
|
||||
data = new AtomicLongFieldData(ramBytesUsed) {
|
||||
|
||||
@Override
|
||||
public SortedNumericDocValues getLongValues() {
|
||||
return pagedSingles(dpValues, docsWithValues);
|
||||
return pagedSingles(pagedValues, docsWithValues);
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -260,7 +259,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
|
||||
}
|
||||
|
||||
protected CommonSettings.MemoryStorageFormat chooseStorageFormat(AtomicReader reader, MonotonicAppendingLongBuffer values, Ordinals build, RandomAccessOrds ordinals,
|
||||
protected CommonSettings.MemoryStorageFormat chooseStorageFormat(AtomicReader reader, PackedLongValues values, Ordinals build, RandomAccessOrds ordinals,
|
||||
long minValue, long maxValue, float acceptableOverheadRatio, int pageSize) {
|
||||
|
||||
CommonSettings.MemoryStorageFormat format;
|
||||
|
@ -318,7 +317,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
return format;
|
||||
}
|
||||
|
||||
private long getPageMemoryUsage(MonotonicAppendingLongBuffer values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
|
||||
private long getPageMemoryUsage(PackedLongValues values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
|
||||
int bitsRequired;
|
||||
long pageMemorySize = 0;
|
||||
PackedInts.FormatAndBits formatAndBits;
|
||||
|
@ -484,7 +483,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
return DocValues.singleton(values, docsWithFields);
|
||||
}
|
||||
|
||||
private static SortedNumericDocValues pagedSingles(final AppendingDeltaPackedLongBuffer values, final FixedBitSet docsWithValue) {
|
||||
private static SortedNumericDocValues pagedSingles(final PackedLongValues values, final FixedBitSet docsWithValue) {
|
||||
return DocValues.singleton(new NumericDocValues() {
|
||||
// we need to wrap since NumericDocValues must return 0 when a doc has no value
|
||||
@Override
|
||||
|
|
|
@ -21,7 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
|
|||
import org.apache.lucene.index.RandomAccessOrds;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
|
||||
/**
|
||||
|
@ -29,10 +29,10 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
|||
public class PagedBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData {
|
||||
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
|
||||
private final PackedLongValues termOrdToBytesOffset;
|
||||
protected final Ordinals ordinals;
|
||||
|
||||
public PagedBytesAtomicFieldData(PagedBytes.Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Ordinals ordinals) {
|
||||
public PagedBytesAtomicFieldData(PagedBytes.Reader bytes, PackedLongValues termOrdToBytesOffset, Ordinals ordinals) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.ordinals = ordinals;
|
||||
|
@ -61,9 +61,9 @@ public class PagedBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData {
|
|||
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
|
||||
private final PackedLongValues termOrdToBytesOffset;
|
||||
|
||||
ValuesHolder(PagedBytes.Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset) {
|
||||
ValuesHolder(PagedBytes.Reader bytes, PackedLongValues termOrdToBytesOffset) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,8 @@ import org.apache.lucene.codecs.blocktree.Stats;
|
|||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
import org.elasticsearch.common.breaker.CircuitBreaker;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
|
@ -71,7 +72,7 @@ public class PagedBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
|
|||
|
||||
final PagedBytes bytes = new PagedBytes(15);
|
||||
|
||||
final MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
|
||||
final PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
|
||||
final long numTerms;
|
||||
if (regex == null && frequency == null) {
|
||||
numTerms = terms.size();
|
||||
|
@ -102,7 +103,7 @@ public class PagedBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
|
|||
PagedBytes.Reader bytesReader = bytes.freeze(true);
|
||||
final Ordinals ordinals = builder.build(fieldDataType.getSettings());
|
||||
|
||||
data = new PagedBytesAtomicFieldData(bytesReader, termOrdToBytesOffset, ordinals);
|
||||
data = new PagedBytesAtomicFieldData(bytesReader, termOrdToBytesOffset.build(), ordinals);
|
||||
success = true;
|
||||
return data;
|
||||
} finally {
|
||||
|
|
|
@ -23,12 +23,13 @@ import com.carrotsearch.hppc.ObjectObjectOpenHashMap;
|
|||
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
|
||||
import com.google.common.collect.ImmutableSortedSet;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
@ -136,7 +137,7 @@ public class ParentChildIndexFieldData extends AbstractIndexFieldData<AtomicPare
|
|||
|
||||
typeToAtomicFieldData.put(
|
||||
cursor.key,
|
||||
new PagedBytesAtomicFieldData(bytesReader, cursor.value.termOrdToBytesOffset, ordinals)
|
||||
new PagedBytesAtomicFieldData(bytesReader, cursor.value.termOrdToBytesOffset.build(), ordinals)
|
||||
);
|
||||
}
|
||||
data = new ParentChildAtomicFieldData(typeToAtomicFieldData.build());
|
||||
|
@ -183,12 +184,12 @@ public class ParentChildIndexFieldData extends AbstractIndexFieldData<AtomicPare
|
|||
class TypeBuilder {
|
||||
|
||||
final PagedBytes bytes;
|
||||
final MonotonicAppendingLongBuffer termOrdToBytesOffset;
|
||||
final PackedLongValues.Builder termOrdToBytesOffset;
|
||||
final OrdinalsBuilder builder;
|
||||
|
||||
TypeBuilder(float acceptableTransientOverheadRatio, AtomicReader reader) throws IOException {
|
||||
bytes = new PagedBytes(15);
|
||||
termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
|
||||
termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
|
||||
builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
|
||||
}
|
||||
}
|
||||
|
@ -299,7 +300,7 @@ public class ParentChildIndexFieldData extends AbstractIndexFieldData<AtomicPare
|
|||
for (Map.Entry<String, SortedDocValues[]> entry : types.entrySet()) {
|
||||
final String parentType = entry.getKey();
|
||||
final SortedDocValues[] values = entry.getValue();
|
||||
final XOrdinalMap ordinalMap = XOrdinalMap.build(null, entry.getValue(), PackedInts.DEFAULT);
|
||||
final OrdinalMap ordinalMap = OrdinalMap.build(null, entry.getValue(), PackedInts.DEFAULT);
|
||||
ramBytesUsed += ordinalMap.ramBytesUsed();
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
final SortedDocValues segmentValues = values[i];
|
||||
|
|
|
@ -24,7 +24,7 @@ import com.google.common.base.Charsets;
|
|||
import com.google.common.base.Predicate;
|
||||
import com.google.common.collect.*;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.FilterClause;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
|
@ -1064,11 +1064,12 @@ public class MapperService extends AbstractIndexComponent {
|
|||
}
|
||||
}
|
||||
|
||||
final class SmartIndexNameSearchAnalyzer extends SimpleAnalyzerWrapper {
|
||||
final class SmartIndexNameSearchAnalyzer extends DelegatingAnalyzerWrapper {
|
||||
|
||||
private final Analyzer defaultAnalyzer;
|
||||
|
||||
SmartIndexNameSearchAnalyzer(Analyzer defaultAnalyzer) {
|
||||
super(Analyzer.PER_FIELD_REUSE_STRATEGY);
|
||||
this.defaultAnalyzer = defaultAnalyzer;
|
||||
}
|
||||
|
||||
|
@ -1095,11 +1096,12 @@ public class MapperService extends AbstractIndexComponent {
|
|||
}
|
||||
}
|
||||
|
||||
final class SmartIndexNameSearchQuoteAnalyzer extends SimpleAnalyzerWrapper {
|
||||
final class SmartIndexNameSearchQuoteAnalyzer extends DelegatingAnalyzerWrapper {
|
||||
|
||||
private final Analyzer defaultAnalyzer;
|
||||
|
||||
SmartIndexNameSearchQuoteAnalyzer(Analyzer defaultAnalyzer) {
|
||||
super(Analyzer.PER_FIELD_REUSE_STRATEGY);
|
||||
this.defaultAnalyzer = defaultAnalyzer;
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.action.get.MultiGetRequest;
|
||||
import org.elasticsearch.common.lucene.BytesRefs;
|
||||
|
||||
|
@ -84,10 +84,10 @@ public final class Uid {
|
|||
}
|
||||
|
||||
public static BytesRef typePrefixAsBytes(BytesRef type) {
|
||||
BytesRef bytesRef = new BytesRef(type.length + 1);
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
bytesRef.append(type);
|
||||
bytesRef.append(DELIMITER_BYTES);
|
||||
return bytesRef;
|
||||
return bytesRef.toBytesRef();
|
||||
}
|
||||
|
||||
public static Uid createUid(String uid) {
|
||||
|
@ -127,10 +127,11 @@ public final class Uid {
|
|||
return ref;
|
||||
}
|
||||
|
||||
public static void createUidAsBytes(BytesRef type, BytesRef id, BytesRef spare) {
|
||||
public static BytesRef createUidAsBytes(BytesRef type, BytesRef id, BytesRefBuilder spare) {
|
||||
spare.copyBytes(type);
|
||||
spare.append(DELIMITER_BYTES);
|
||||
spare.append(id);
|
||||
return spare.get();
|
||||
}
|
||||
|
||||
public static BytesRef[] createTypeUids(Collection<String> types, Object ids) {
|
||||
|
@ -140,13 +141,13 @@ public final class Uid {
|
|||
public static BytesRef[] createTypeUids(Collection<String> types, List<? extends Object> ids) {
|
||||
final int numIds = ids.size();
|
||||
BytesRef[] uids = new BytesRef[types.size() * ids.size()];
|
||||
BytesRef typeBytes = new BytesRef();
|
||||
BytesRef idBytes = new BytesRef();
|
||||
BytesRefBuilder typeBytes = new BytesRefBuilder();
|
||||
BytesRefBuilder idBytes = new BytesRefBuilder();
|
||||
int index = 0;
|
||||
for (String type : types) {
|
||||
UnicodeUtil.UTF16toUTF8(type, 0, type.length(), typeBytes);
|
||||
typeBytes.copyChars(type);
|
||||
for (int i = 0; i < numIds; i++, index++) {
|
||||
uids[index] = Uid.createUidAsBytes(typeBytes, BytesRefs.toBytesRef(ids.get(i), idBytes));
|
||||
uids[index] = Uid.createUidAsBytes(typeBytes.get(), BytesRefs.toBytesRef(ids.get(i), idBytes));
|
||||
}
|
||||
}
|
||||
return uids;
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.NumericRangeFilter;
|
|||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
|
@ -162,9 +163,9 @@ public class ByteFieldMapper extends NumberFieldMapper<Byte> {
|
|||
|
||||
@Override
|
||||
public BytesRef indexedValueForSearch(Object value) {
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
NumericUtils.intToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
|
||||
return bytesRef;
|
||||
return bytesRef.get();
|
||||
}
|
||||
|
||||
private byte parseValue(Object value) {
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.search.NumericRangeFilter;
|
|||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
|
@ -249,9 +250,9 @@ public class DateFieldMapper extends NumberFieldMapper<Long> {
|
|||
|
||||
@Override
|
||||
public BytesRef indexedValueForSearch(Object value) {
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
NumericUtils.longToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
|
||||
return bytesRef;
|
||||
return bytesRef.get();
|
||||
}
|
||||
|
||||
private long parseValue(Object value) {
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.search.NumericRangeFilter;
|
|||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
|
@ -167,9 +168,9 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
|
|||
@Override
|
||||
public BytesRef indexedValueForSearch(Object value) {
|
||||
long longValue = NumericUtils.doubleToSortableLong(parseDoubleValue(value));
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
NumericUtils.longToPrefixCoded(longValue, 0, bytesRef); // 0 because of exact match
|
||||
return bytesRef;
|
||||
return bytesRef.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.search.NumericRangeFilter;
|
|||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
|
@ -166,9 +167,9 @@ public class FloatFieldMapper extends NumberFieldMapper<Float> {
|
|||
@Override
|
||||
public BytesRef indexedValueForSearch(Object value) {
|
||||
int intValue = NumericUtils.floatToSortableInt(parseValue(value));
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
NumericUtils.intToPrefixCoded(intValue, 0, bytesRef); // 0 because of exact match
|
||||
return bytesRef;
|
||||
return bytesRef.get();
|
||||
}
|
||||
|
||||
private float parseValue(Object value) {
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.search.NumericRangeFilter;
|
|||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
|
@ -161,9 +162,9 @@ public class IntegerFieldMapper extends NumberFieldMapper<Integer> {
|
|||
|
||||
@Override
|
||||
public BytesRef indexedValueForSearch(Object value) {
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
NumericUtils.intToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
|
||||
return bytesRef;
|
||||
return bytesRef.get();
|
||||
}
|
||||
|
||||
private int parseValue(Object value) {
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.search.NumericRangeFilter;
|
|||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
|
@ -161,9 +162,9 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
|
|||
|
||||
@Override
|
||||
public BytesRef indexedValueForSearch(Object value) {
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
NumericUtils.longToPrefixCoded(parseLongValue(value), 0, bytesRef); // 0 because of exact match
|
||||
return bytesRef;
|
||||
return bytesRef.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.search.NumericRangeFilter;
|
|||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
|
@ -163,9 +164,9 @@ public class ShortFieldMapper extends NumberFieldMapper<Short> {
|
|||
|
||||
@Override
|
||||
public BytesRef indexedValueForSearch(Object value) {
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
NumericUtils.intToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
|
||||
return bytesRef;
|
||||
return bytesRef.get();
|
||||
}
|
||||
|
||||
private short parseValue(Object value) {
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.search.NumericRangeFilter;
|
|||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.Numbers;
|
||||
|
@ -167,9 +168,9 @@ public class BoostFieldMapper extends NumberFieldMapper<Float> implements Intern
|
|||
@Override
|
||||
public BytesRef indexedValueForSearch(Object value) {
|
||||
int intValue = NumericUtils.floatToSortableInt(parseValue(value));
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
NumericUtils.intToPrefixCoded(intValue, precisionStep(), bytesRef);
|
||||
return bytesRef;
|
||||
return bytesRef.get();
|
||||
}
|
||||
|
||||
private float parseValue(Object value) {
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.search.NumericRangeFilter;
|
|||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
|
@ -206,9 +207,9 @@ public class IpFieldMapper extends NumberFieldMapper<Long> {
|
|||
|
||||
@Override
|
||||
public BytesRef indexedValueForSearch(Object value) {
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
NumericUtils.longToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
|
||||
return bytesRef;
|
||||
return bytesRef.get();
|
||||
}
|
||||
|
||||
private long parseValue(Object value) {
|
||||
|
|
|
@ -218,11 +218,6 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
|
|||
return upgradedMergeSpecification(delegate.findForcedDeletesMerges(segmentInfos, writer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
delegate.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment, IndexWriter writer) throws IOException {
|
||||
return delegate.useCompoundFile(segments, newSegment, writer);
|
||||
|
|
|
@ -38,29 +38,30 @@ import java.util.concurrent.CopyOnWriteArraySet;
|
|||
public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<LogByteSizeMergePolicy> {
|
||||
|
||||
private final IndexSettingsService indexSettingsService;
|
||||
public static final String MAX_MERGE_BYTE_SIZE_KEY = "index.merge.policy.max_merge_sizes";
|
||||
public static final String MIN_MERGE_BYTE_SIZE_KEY = "index.merge.policy.min_merge_size";
|
||||
public static final String MERGE_FACTORY_KEY = "index.merge.policy.merge_factor";
|
||||
private volatile ByteSizeValue minMergeSize;
|
||||
private volatile ByteSizeValue maxMergeSize;
|
||||
private volatile int mergeFactor;
|
||||
private volatile int maxMergeDocs;
|
||||
private final boolean calibrateSizeByDeletes;
|
||||
|
||||
private final Set<CustomLogByteSizeMergePolicy> policies = new CopyOnWriteArraySet<>();
|
||||
|
||||
private final ApplySettings applySettings = new ApplySettings();
|
||||
private final LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();
|
||||
|
||||
private static final ByteSizeValue DEFAULT_MIN_MERGE_SIZE = new ByteSizeValue((long) (LogByteSizeMergePolicy.DEFAULT_MIN_MERGE_MB * 1024 * 1024), ByteSizeUnit.BYTES);
|
||||
private static final ByteSizeValue DEFAULT_MAX_MERGE_SIZE = new ByteSizeValue((long) LogByteSizeMergePolicy.DEFAULT_MAX_MERGE_MB, ByteSizeUnit.MB);
|
||||
|
||||
@Inject
|
||||
public LogByteSizeMergePolicyProvider(Store store, IndexSettingsService indexSettingsService) {
|
||||
super(store);
|
||||
Preconditions.checkNotNull(store, "Store must be provided to merge policy");
|
||||
this.indexSettingsService = indexSettingsService;
|
||||
this.minMergeSize = componentSettings.getAsBytesSize("min_merge_size", new ByteSizeValue((long) (LogByteSizeMergePolicy.DEFAULT_MIN_MERGE_MB * 1024 * 1024), ByteSizeUnit.BYTES));
|
||||
this.maxMergeSize = componentSettings.getAsBytesSize("max_merge_size", new ByteSizeValue((long) LogByteSizeMergePolicy.DEFAULT_MAX_MERGE_MB, ByteSizeUnit.MB));
|
||||
this.mergeFactor = componentSettings.getAsInt("merge_factor", LogByteSizeMergePolicy.DEFAULT_MERGE_FACTOR);
|
||||
this.maxMergeDocs = componentSettings.getAsInt("max_merge_docs", LogByteSizeMergePolicy.DEFAULT_MAX_MERGE_DOCS);
|
||||
this.calibrateSizeByDeletes = componentSettings.getAsBoolean("calibrate_size_by_deletes", true);
|
||||
|
||||
ByteSizeValue minMergeSize = componentSettings.getAsBytesSize("min_merge_size", DEFAULT_MIN_MERGE_SIZE);
|
||||
ByteSizeValue maxMergeSize = componentSettings.getAsBytesSize("max_merge_size", DEFAULT_MAX_MERGE_SIZE);
|
||||
int mergeFactor = componentSettings.getAsInt("merge_factor", LogByteSizeMergePolicy.DEFAULT_MERGE_FACTOR);
|
||||
int maxMergeDocs = componentSettings.getAsInt("max_merge_docs", LogByteSizeMergePolicy.DEFAULT_MAX_MERGE_DOCS);
|
||||
boolean calibrateSizeByDeletes = componentSettings.getAsBoolean("calibrate_size_by_deletes", true);
|
||||
|
||||
mergePolicy.setMinMergeMB(minMergeSize.mbFrac());
|
||||
mergePolicy.setMaxMergeMB(maxMergeSize.mbFrac());
|
||||
mergePolicy.setMergeFactor(mergeFactor);
|
||||
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
||||
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
logger.debug("using [log_bytes_size] merge policy with merge_factor[{}], min_merge_size[{}], max_merge_size[{}], max_merge_docs[{}], calibrate_size_by_deletes[{}]",
|
||||
mergeFactor, minMergeSize, maxMergeSize, maxMergeDocs, calibrateSizeByDeletes);
|
||||
|
||||
|
@ -68,16 +69,7 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
|
|||
}
|
||||
|
||||
@Override
|
||||
public LogByteSizeMergePolicy newMergePolicy() {
|
||||
final CustomLogByteSizeMergePolicy mergePolicy = new CustomLogByteSizeMergePolicy(this);
|
||||
mergePolicy.setMinMergeMB(minMergeSize.mbFrac());
|
||||
mergePolicy.setMaxMergeMB(maxMergeSize.mbFrac());
|
||||
mergePolicy.setMergeFactor(mergeFactor);
|
||||
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
||||
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
|
||||
policies.add(mergePolicy);
|
||||
public LogByteSizeMergePolicy getMergePolicy() {
|
||||
return mergePolicy;
|
||||
}
|
||||
|
||||
|
@ -90,72 +82,52 @@ public class LogByteSizeMergePolicyProvider extends AbstractMergePolicyProvider<
|
|||
public static final String INDEX_MERGE_POLICY_MAX_MERGE_SIZE = "index.merge.policy.max_merge_size";
|
||||
public static final String INDEX_MERGE_POLICY_MAX_MERGE_DOCS = "index.merge.policy.max_merge_docs";
|
||||
public static final String INDEX_MERGE_POLICY_MERGE_FACTOR = "index.merge.policy.merge_factor";
|
||||
public static final String INDEX_MERGE_POLICY_CALIBRATE_SIZE_BY_DELETES = "index.merge.policy.calibrate_size_by_deletes";
|
||||
|
||||
class ApplySettings implements IndexSettingsService.Listener {
|
||||
@Override
|
||||
public void onRefreshSettings(Settings settings) {
|
||||
ByteSizeValue minMergeSize = settings.getAsBytesSize(INDEX_MERGE_POLICY_MIN_MERGE_SIZE, LogByteSizeMergePolicyProvider.this.minMergeSize);
|
||||
if (!minMergeSize.equals(LogByteSizeMergePolicyProvider.this.minMergeSize)) {
|
||||
logger.info("updating min_merge_size from [{}] to [{}]", LogByteSizeMergePolicyProvider.this.minMergeSize, minMergeSize);
|
||||
LogByteSizeMergePolicyProvider.this.minMergeSize = minMergeSize;
|
||||
for (CustomLogByteSizeMergePolicy policy : policies) {
|
||||
policy.setMinMergeMB(minMergeSize.mbFrac());
|
||||
}
|
||||
double oldMinMergeSizeMB = mergePolicy.getMinMergeMB();
|
||||
ByteSizeValue minMergeSize = settings.getAsBytesSize(INDEX_MERGE_POLICY_MIN_MERGE_SIZE, DEFAULT_MIN_MERGE_SIZE);
|
||||
if (minMergeSize.mbFrac() != oldMinMergeSizeMB) {
|
||||
logger.info("updating min_merge_size from [{}mb] to [{}]", oldMinMergeSizeMB, minMergeSize);
|
||||
mergePolicy.setMinMergeMB(minMergeSize.mbFrac());
|
||||
}
|
||||
|
||||
ByteSizeValue maxMergeSize = settings.getAsBytesSize(INDEX_MERGE_POLICY_MAX_MERGE_SIZE, LogByteSizeMergePolicyProvider.this.maxMergeSize);
|
||||
if (!maxMergeSize.equals(LogByteSizeMergePolicyProvider.this.maxMergeSize)) {
|
||||
logger.info("updating max_merge_size from [{}] to [{}]", LogByteSizeMergePolicyProvider.this.maxMergeSize, maxMergeSize);
|
||||
LogByteSizeMergePolicyProvider.this.maxMergeSize = maxMergeSize;
|
||||
for (CustomLogByteSizeMergePolicy policy : policies) {
|
||||
policy.setMaxMergeMB(maxMergeSize.mbFrac());
|
||||
}
|
||||
double oldMaxMergeSizeMB = mergePolicy.getMaxMergeMB();
|
||||
ByteSizeValue maxMergeSize = settings.getAsBytesSize(INDEX_MERGE_POLICY_MAX_MERGE_SIZE, DEFAULT_MAX_MERGE_SIZE);
|
||||
if (maxMergeSize.mbFrac() != oldMaxMergeSizeMB) {
|
||||
logger.info("updating max_merge_size from [{}mb] to [{}]", oldMaxMergeSizeMB, maxMergeSize);
|
||||
mergePolicy.setMaxMergeMB(maxMergeSize.mbFrac());
|
||||
}
|
||||
|
||||
int maxMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_DOCS, LogByteSizeMergePolicyProvider.this.maxMergeDocs);
|
||||
if (maxMergeDocs != LogByteSizeMergePolicyProvider.this.maxMergeDocs) {
|
||||
logger.info("updating max_merge_docs from [{}] to [{}]", LogByteSizeMergePolicyProvider.this.maxMergeDocs, maxMergeDocs);
|
||||
LogByteSizeMergePolicyProvider.this.maxMergeDocs = maxMergeDocs;
|
||||
for (CustomLogByteSizeMergePolicy policy : policies) {
|
||||
policy.setMaxMergeDocs(maxMergeDocs);
|
||||
}
|
||||
int oldMaxMergeDocs = mergePolicy.getMaxMergeDocs();
|
||||
int maxMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_DOCS, LogByteSizeMergePolicy.DEFAULT_MAX_MERGE_DOCS);
|
||||
if (maxMergeDocs != oldMaxMergeDocs) {
|
||||
logger.info("updating max_merge_docs from [{}] to [{}]", oldMaxMergeDocs, maxMergeDocs);
|
||||
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
||||
}
|
||||
|
||||
int mergeFactor = settings.getAsInt(INDEX_MERGE_POLICY_MERGE_FACTOR, LogByteSizeMergePolicyProvider.this.mergeFactor);
|
||||
if (mergeFactor != LogByteSizeMergePolicyProvider.this.mergeFactor) {
|
||||
logger.info("updating merge_factor from [{}] to [{}]", LogByteSizeMergePolicyProvider.this.mergeFactor, mergeFactor);
|
||||
LogByteSizeMergePolicyProvider.this.mergeFactor = mergeFactor;
|
||||
for (CustomLogByteSizeMergePolicy policy : policies) {
|
||||
policy.setMergeFactor(mergeFactor);
|
||||
}
|
||||
int oldMergeFactor = mergePolicy.getMergeFactor();
|
||||
int mergeFactor = settings.getAsInt(INDEX_MERGE_POLICY_MERGE_FACTOR, LogByteSizeMergePolicy.DEFAULT_MERGE_FACTOR);
|
||||
if (mergeFactor != oldMergeFactor) {
|
||||
logger.info("updating merge_factor from [{}] to [{}]", oldMergeFactor, mergeFactor);
|
||||
mergePolicy.setMergeFactor(mergeFactor);
|
||||
}
|
||||
|
||||
boolean oldCalibrateSizeByDeletes = mergePolicy.getCalibrateSizeByDeletes();
|
||||
boolean calibrateSizeByDeletes = settings.getAsBoolean(INDEX_MERGE_POLICY_CALIBRATE_SIZE_BY_DELETES, true);
|
||||
if (calibrateSizeByDeletes != oldCalibrateSizeByDeletes) {
|
||||
logger.info("updating calibrate_size_by_deletes from [{}] to [{}]", oldCalibrateSizeByDeletes, calibrateSizeByDeletes);
|
||||
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
||||
}
|
||||
|
||||
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(LogByteSizeMergePolicyProvider.this.noCFSRatio)));
|
||||
if (noCFSRatio != LogByteSizeMergePolicyProvider.this.noCFSRatio) {
|
||||
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogByteSizeMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
||||
LogByteSizeMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
||||
for (CustomLogByteSizeMergePolicy policy : policies) {
|
||||
policy.setNoCFSRatio(noCFSRatio);
|
||||
}
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public static class CustomLogByteSizeMergePolicy extends LogByteSizeMergePolicy {
|
||||
|
||||
private final LogByteSizeMergePolicyProvider provider;
|
||||
|
||||
public CustomLogByteSizeMergePolicy(LogByteSizeMergePolicyProvider provider) {
|
||||
super();
|
||||
this.provider = provider;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
super.close();
|
||||
provider.policies.remove(this);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -36,27 +36,30 @@ import java.util.concurrent.CopyOnWriteArraySet;
|
|||
public class LogDocMergePolicyProvider extends AbstractMergePolicyProvider<LogDocMergePolicy> {
|
||||
|
||||
private final IndexSettingsService indexSettingsService;
|
||||
private final ApplySettings applySettings = new ApplySettings();
|
||||
private final LogDocMergePolicy mergePolicy = new LogDocMergePolicy();
|
||||
|
||||
|
||||
public static final String MAX_MERGE_DOCS_KEY = "index.merge.policy.max_merge_docs";
|
||||
public static final String MIN_MERGE_DOCS_KEY = "index.merge.policy.min_merge_docs";
|
||||
public static final String MERGE_FACTORY_KEY = "index.merge.policy.merge_factor";
|
||||
private volatile int minMergeDocs;
|
||||
private volatile int maxMergeDocs;
|
||||
private volatile int mergeFactor;
|
||||
private final boolean calibrateSizeByDeletes;
|
||||
|
||||
private final Set<CustomLogDocMergePolicy> policies = new CopyOnWriteArraySet<>();
|
||||
|
||||
private final ApplySettings applySettings = new ApplySettings();
|
||||
|
||||
@Inject
|
||||
public LogDocMergePolicyProvider(Store store, IndexSettingsService indexSettingsService) {
|
||||
super(store);
|
||||
Preconditions.checkNotNull(store, "Store must be provided to merge policy");
|
||||
this.indexSettingsService = indexSettingsService;
|
||||
this.minMergeDocs = componentSettings.getAsInt("min_merge_docs", LogDocMergePolicy.DEFAULT_MIN_MERGE_DOCS);
|
||||
this.maxMergeDocs = componentSettings.getAsInt("max_merge_docs", LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS);
|
||||
this.mergeFactor = componentSettings.getAsInt("merge_factor", LogDocMergePolicy.DEFAULT_MERGE_FACTOR);
|
||||
this.calibrateSizeByDeletes = componentSettings.getAsBoolean("calibrate_size_by_deletes", true);
|
||||
|
||||
int minMergeDocs = componentSettings.getAsInt("min_merge_docs", LogDocMergePolicy.DEFAULT_MIN_MERGE_DOCS);
|
||||
int maxMergeDocs = componentSettings.getAsInt("max_merge_docs", LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS);
|
||||
int mergeFactor = componentSettings.getAsInt("merge_factor", LogDocMergePolicy.DEFAULT_MERGE_FACTOR);
|
||||
boolean calibrateSizeByDeletes = componentSettings.getAsBoolean("calibrate_size_by_deletes", true);
|
||||
|
||||
mergePolicy.setMinMergeDocs(minMergeDocs);
|
||||
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
||||
mergePolicy.setMergeFactor(mergeFactor);
|
||||
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
logger.debug("using [log_doc] merge policy with merge_factor[{}], min_merge_docs[{}], max_merge_docs[{}], calibrate_size_by_deletes[{}]",
|
||||
mergeFactor, minMergeDocs, maxMergeDocs, calibrateSizeByDeletes);
|
||||
|
||||
|
@ -69,76 +72,52 @@ public class LogDocMergePolicyProvider extends AbstractMergePolicyProvider<LogDo
|
|||
}
|
||||
|
||||
@Override
|
||||
public LogDocMergePolicy newMergePolicy() {
|
||||
final CustomLogDocMergePolicy mergePolicy = new CustomLogDocMergePolicy(this);
|
||||
mergePolicy.setMinMergeDocs(minMergeDocs);
|
||||
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
||||
mergePolicy.setMergeFactor(mergeFactor);
|
||||
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
policies.add(mergePolicy);
|
||||
public LogDocMergePolicy getMergePolicy() {
|
||||
return mergePolicy;
|
||||
}
|
||||
|
||||
public static final String INDEX_MERGE_POLICY_MIN_MERGE_DOCS = "index.merge.policy.min_merge_docs";
|
||||
public static final String INDEX_MERGE_POLICY_MAX_MERGE_DOCS = "index.merge.policy.max_merge_docs";
|
||||
public static final String INDEX_MERGE_POLICY_MERGE_FACTOR = "index.merge.policy.merge_factor";
|
||||
public static final String INDEX_MERGE_POLICY_CALIBRATE_SIZE_BY_DELETES = "index.merge.policy.calibrate_size_by_deletes";
|
||||
|
||||
class ApplySettings implements IndexSettingsService.Listener {
|
||||
@Override
|
||||
public void onRefreshSettings(Settings settings) {
|
||||
int minMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MIN_MERGE_DOCS, LogDocMergePolicyProvider.this.minMergeDocs);
|
||||
if (minMergeDocs != LogDocMergePolicyProvider.this.minMergeDocs) {
|
||||
logger.info("updating min_merge_docs from [{}] to [{}]", LogDocMergePolicyProvider.this.minMergeDocs, minMergeDocs);
|
||||
LogDocMergePolicyProvider.this.minMergeDocs = minMergeDocs;
|
||||
for (CustomLogDocMergePolicy policy : policies) {
|
||||
policy.setMinMergeDocs(minMergeDocs);
|
||||
}
|
||||
int oldMinMergeDocs = mergePolicy.getMinMergeDocs();
|
||||
int minMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MIN_MERGE_DOCS, LogDocMergePolicy.DEFAULT_MIN_MERGE_DOCS);
|
||||
if (minMergeDocs != oldMinMergeDocs) {
|
||||
logger.info("updating min_merge_docs from [{}] to [{}]", oldMinMergeDocs, minMergeDocs);
|
||||
mergePolicy.setMinMergeDocs(minMergeDocs);
|
||||
}
|
||||
|
||||
int maxMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_DOCS, LogDocMergePolicyProvider.this.maxMergeDocs);
|
||||
if (maxMergeDocs != LogDocMergePolicyProvider.this.maxMergeDocs) {
|
||||
logger.info("updating max_merge_docs from [{}] to [{}]", LogDocMergePolicyProvider.this.maxMergeDocs, maxMergeDocs);
|
||||
LogDocMergePolicyProvider.this.maxMergeDocs = maxMergeDocs;
|
||||
for (CustomLogDocMergePolicy policy : policies) {
|
||||
policy.setMaxMergeDocs(maxMergeDocs);
|
||||
}
|
||||
int oldMaxMergeDocs = mergePolicy.getMaxMergeDocs();
|
||||
int maxMergeDocs = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_DOCS, LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS);
|
||||
if (maxMergeDocs != oldMaxMergeDocs) {
|
||||
logger.info("updating max_merge_docs from [{}] to [{}]", oldMaxMergeDocs, maxMergeDocs);
|
||||
mergePolicy.setMaxMergeDocs(maxMergeDocs);
|
||||
}
|
||||
|
||||
int mergeFactor = settings.getAsInt(INDEX_MERGE_POLICY_MERGE_FACTOR, LogDocMergePolicyProvider.this.mergeFactor);
|
||||
if (mergeFactor != LogDocMergePolicyProvider.this.mergeFactor) {
|
||||
logger.info("updating merge_factor from [{}] to [{}]", LogDocMergePolicyProvider.this.mergeFactor, mergeFactor);
|
||||
LogDocMergePolicyProvider.this.mergeFactor = mergeFactor;
|
||||
for (CustomLogDocMergePolicy policy : policies) {
|
||||
policy.setMergeFactor(mergeFactor);
|
||||
}
|
||||
int oldMergeFactor = mergePolicy.getMergeFactor();
|
||||
int mergeFactor = settings.getAsInt(INDEX_MERGE_POLICY_MERGE_FACTOR, LogDocMergePolicy.DEFAULT_MERGE_FACTOR);
|
||||
if (mergeFactor != oldMergeFactor) {
|
||||
logger.info("updating merge_factor from [{}] to [{}]", oldMergeFactor, mergeFactor);
|
||||
mergePolicy.setMergeFactor(mergeFactor);
|
||||
}
|
||||
|
||||
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(LogDocMergePolicyProvider.this.noCFSRatio)));
|
||||
final boolean compoundFormat = noCFSRatio != 0.0;
|
||||
boolean oldCalibrateSizeByDeletes = mergePolicy.getCalibrateSizeByDeletes();
|
||||
boolean calibrateSizeByDeletes = settings.getAsBoolean(INDEX_MERGE_POLICY_CALIBRATE_SIZE_BY_DELETES, true);
|
||||
if (calibrateSizeByDeletes != oldCalibrateSizeByDeletes) {
|
||||
logger.info("updating calibrate_size_by_deletes from [{}] to [{}]", oldCalibrateSizeByDeletes, calibrateSizeByDeletes);
|
||||
mergePolicy.setCalibrateSizeByDeletes(calibrateSizeByDeletes);
|
||||
}
|
||||
|
||||
double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(LogDocMergePolicyProvider.this.noCFSRatio)));
|
||||
if (noCFSRatio != LogDocMergePolicyProvider.this.noCFSRatio) {
|
||||
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(LogDocMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
||||
LogDocMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
||||
for (CustomLogDocMergePolicy policy : policies) {
|
||||
policy.setNoCFSRatio(noCFSRatio);
|
||||
}
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class CustomLogDocMergePolicy extends LogDocMergePolicy {
|
||||
|
||||
private final LogDocMergePolicyProvider provider;
|
||||
|
||||
public CustomLogDocMergePolicy(LogDocMergePolicyProvider provider) {
|
||||
super();
|
||||
this.provider = provider;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
super.close();
|
||||
provider.policies.remove(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,5 +28,5 @@ import org.elasticsearch.index.shard.IndexShardComponent;
|
|||
*/
|
||||
public interface MergePolicyProvider<T extends MergePolicy> extends IndexShardComponent, CloseableIndexComponent {
|
||||
|
||||
T newMergePolicy();
|
||||
T getMergePolicy();
|
||||
}
|
||||
|
|
|
@ -28,65 +28,35 @@ import org.elasticsearch.common.unit.ByteSizeValue;
|
|||
import org.elasticsearch.index.settings.IndexSettingsService;
|
||||
import org.elasticsearch.index.store.Store;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CopyOnWriteArraySet;
|
||||
|
||||
public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<TieredMergePolicy> {
|
||||
|
||||
private final IndexSettingsService indexSettingsService;
|
||||
|
||||
private final Set<CustomTieredMergePolicyProvider> policies = new CopyOnWriteArraySet<>();
|
||||
|
||||
private volatile double forceMergeDeletesPctAllowed;
|
||||
private volatile ByteSizeValue floorSegment;
|
||||
private volatile int maxMergeAtOnce;
|
||||
private volatile int maxMergeAtOnceExplicit;
|
||||
private volatile ByteSizeValue maxMergedSegment;
|
||||
private volatile double segmentsPerTier;
|
||||
private volatile double reclaimDeletesWeight;
|
||||
|
||||
private final ApplySettings applySettings = new ApplySettings();
|
||||
private final TieredMergePolicy mergePolicy = new TieredMergePolicy();
|
||||
|
||||
|
||||
public static final double DEFAULT_EXPUNGE_DELETES_ALLOWED = 10d;
|
||||
public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = new ByteSizeValue(2, ByteSizeUnit.MB);
|
||||
public static final int DEFAULT_MAX_MERGE_AT_ONCE = 10;
|
||||
public static final int DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT = 30;
|
||||
public static final ByteSizeValue DEFAULT_MAX_MERGED_SEGMENT = new ByteSizeValue(5, ByteSizeUnit.GB);
|
||||
public static final double DEFAULT_SEGMENTS_PER_TIER = 10.0d;
|
||||
public static final double DEFAULT_RECLAIM_DELETES_WEIGHT = 2.0d;
|
||||
|
||||
@Inject
|
||||
public TieredMergePolicyProvider(Store store, IndexSettingsService indexSettingsService) {
|
||||
super(store);
|
||||
this.indexSettingsService = indexSettingsService;
|
||||
this.forceMergeDeletesPctAllowed = componentSettings.getAsDouble("expunge_deletes_allowed", 10d); // percentage
|
||||
this.floorSegment = componentSettings.getAsBytesSize("floor_segment", new ByteSizeValue(2, ByteSizeUnit.MB));
|
||||
this.maxMergeAtOnce = componentSettings.getAsInt("max_merge_at_once", 10);
|
||||
this.maxMergeAtOnceExplicit = componentSettings.getAsInt("max_merge_at_once_explicit", 30);
|
||||
|
||||
double forceMergeDeletesPctAllowed = componentSettings.getAsDouble("expunge_deletes_allowed", DEFAULT_EXPUNGE_DELETES_ALLOWED); // percentage
|
||||
ByteSizeValue floorSegment = componentSettings.getAsBytesSize("floor_segment", DEFAULT_FLOOR_SEGMENT);
|
||||
int maxMergeAtOnce = componentSettings.getAsInt("max_merge_at_once", DEFAULT_MAX_MERGE_AT_ONCE);
|
||||
int maxMergeAtOnceExplicit = componentSettings.getAsInt("max_merge_at_once_explicit", DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT);
|
||||
// TODO is this really a good default number for max_merge_segment, what happens for large indices, won't they end up with many segments?
|
||||
this.maxMergedSegment = componentSettings.getAsBytesSize("max_merged_segment", componentSettings.getAsBytesSize("max_merge_segment", new ByteSizeValue(5, ByteSizeUnit.GB)));
|
||||
this.segmentsPerTier = componentSettings.getAsDouble("segments_per_tier", 10.0d);
|
||||
this.reclaimDeletesWeight = componentSettings.getAsDouble("reclaim_deletes_weight", 2.0d);
|
||||
ByteSizeValue maxMergedSegment = componentSettings.getAsBytesSize("max_merged_segment", DEFAULT_MAX_MERGED_SEGMENT);
|
||||
double segmentsPerTier = componentSettings.getAsDouble("segments_per_tier", DEFAULT_SEGMENTS_PER_TIER);
|
||||
double reclaimDeletesWeight = componentSettings.getAsDouble("reclaim_deletes_weight", DEFAULT_RECLAIM_DELETES_WEIGHT);
|
||||
|
||||
fixSettingsIfNeeded();
|
||||
|
||||
logger.debug("using [tiered] merge policy with expunge_deletes_allowed[{}], floor_segment[{}], max_merge_at_once[{}], max_merge_at_once_explicit[{}], max_merged_segment[{}], segments_per_tier[{}], reclaim_deletes_weight[{}]",
|
||||
forceMergeDeletesPctAllowed, floorSegment, maxMergeAtOnce, maxMergeAtOnceExplicit, maxMergedSegment, segmentsPerTier, reclaimDeletesWeight);
|
||||
|
||||
indexSettingsService.addListener(applySettings);
|
||||
}
|
||||
|
||||
private void fixSettingsIfNeeded() {
|
||||
// fixing maxMergeAtOnce, see TieredMergePolicy#setMaxMergeAtOnce
|
||||
if (!(segmentsPerTier >= maxMergeAtOnce)) {
|
||||
int newMaxMergeAtOnce = (int) segmentsPerTier;
|
||||
// max merge at once should be at least 2
|
||||
if (newMaxMergeAtOnce <= 1) {
|
||||
newMaxMergeAtOnce = 2;
|
||||
}
|
||||
logger.debug("[tiered] merge policy changing max_merge_at_once from [{}] to [{}] because segments_per_tier [{}] has to be higher or equal to it", maxMergeAtOnce, newMaxMergeAtOnce, segmentsPerTier);
|
||||
this.maxMergeAtOnce = newMaxMergeAtOnce;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TieredMergePolicy newMergePolicy() {
|
||||
final CustomTieredMergePolicyProvider mergePolicy = new CustomTieredMergePolicyProvider(this);
|
||||
maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier);
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed);
|
||||
mergePolicy.setFloorSegmentMB(floorSegment.mbFrac());
|
||||
|
@ -95,6 +65,28 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
|
|||
mergePolicy.setMaxMergedSegmentMB(maxMergedSegment.mbFrac());
|
||||
mergePolicy.setSegmentsPerTier(segmentsPerTier);
|
||||
mergePolicy.setReclaimDeletesWeight(reclaimDeletesWeight);
|
||||
logger.debug("using [tiered] merge mergePolicy with expunge_deletes_allowed[{}], floor_segment[{}], max_merge_at_once[{}], max_merge_at_once_explicit[{}], max_merged_segment[{}], segments_per_tier[{}], reclaim_deletes_weight[{}]",
|
||||
forceMergeDeletesPctAllowed, floorSegment, maxMergeAtOnce, maxMergeAtOnceExplicit, maxMergedSegment, segmentsPerTier, reclaimDeletesWeight);
|
||||
|
||||
indexSettingsService.addListener(applySettings);
|
||||
}
|
||||
|
||||
private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerTier) {
|
||||
// fixing maxMergeAtOnce, see TieredMergePolicy#setMaxMergeAtOnce
|
||||
if (!(segmentsPerTier >= maxMergeAtOnce)) {
|
||||
int newMaxMergeAtOnce = (int) segmentsPerTier;
|
||||
// max merge at once should be at least 2
|
||||
if (newMaxMergeAtOnce <= 1) {
|
||||
newMaxMergeAtOnce = 2;
|
||||
}
|
||||
logger.debug("[tiered] merge mergePolicy changing max_merge_at_once from [{}] to [{}] because segments_per_tier [{}] has to be higher or equal to it", maxMergeAtOnce, newMaxMergeAtOnce, segmentsPerTier);
|
||||
maxMergeAtOnce = newMaxMergeAtOnce;
|
||||
}
|
||||
return maxMergeAtOnce;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TieredMergePolicy getMergePolicy() {
|
||||
return mergePolicy;
|
||||
}
|
||||
|
||||
|
@ -114,95 +106,62 @@ public class TieredMergePolicyProvider extends AbstractMergePolicyProvider<Tiere
|
|||
class ApplySettings implements IndexSettingsService.Listener {
|
||||
@Override
|
||||
public void onRefreshSettings(Settings settings) {
|
||||
double expungeDeletesPctAllowed = settings.getAsDouble(INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED, TieredMergePolicyProvider.this.forceMergeDeletesPctAllowed);
|
||||
if (expungeDeletesPctAllowed != TieredMergePolicyProvider.this.forceMergeDeletesPctAllowed) {
|
||||
logger.info("updating [expunge_deletes_allowed] from [{}] to [{}]", TieredMergePolicyProvider.this.forceMergeDeletesPctAllowed, expungeDeletesPctAllowed);
|
||||
TieredMergePolicyProvider.this.forceMergeDeletesPctAllowed = expungeDeletesPctAllowed;
|
||||
for (CustomTieredMergePolicyProvider policy : policies) {
|
||||
policy.setForceMergeDeletesPctAllowed(expungeDeletesPctAllowed);
|
||||
}
|
||||
double oldExpungeDeletesPctAllowed = mergePolicy.getForceMergeDeletesPctAllowed();
|
||||
double expungeDeletesPctAllowed = settings.getAsDouble(INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED, DEFAULT_EXPUNGE_DELETES_ALLOWED);
|
||||
if (expungeDeletesPctAllowed != oldExpungeDeletesPctAllowed) {
|
||||
logger.info("updating [expunge_deletes_allowed] from [{}] to [{}]", oldExpungeDeletesPctAllowed, expungeDeletesPctAllowed);
|
||||
mergePolicy.setForceMergeDeletesPctAllowed(expungeDeletesPctAllowed);
|
||||
}
|
||||
|
||||
ByteSizeValue floorSegment = settings.getAsBytesSize(INDEX_MERGE_POLICY_FLOOR_SEGMENT, TieredMergePolicyProvider.this.floorSegment);
|
||||
if (!floorSegment.equals(TieredMergePolicyProvider.this.floorSegment)) {
|
||||
logger.info("updating [floor_segment] from [{}] to [{}]", TieredMergePolicyProvider.this.floorSegment, floorSegment);
|
||||
TieredMergePolicyProvider.this.floorSegment = floorSegment;
|
||||
for (CustomTieredMergePolicyProvider policy : policies) {
|
||||
policy.setFloorSegmentMB(floorSegment.mbFrac());
|
||||
}
|
||||
double oldFloorSegmentMB = mergePolicy.getFloorSegmentMB();
|
||||
ByteSizeValue floorSegment = settings.getAsBytesSize(INDEX_MERGE_POLICY_FLOOR_SEGMENT, DEFAULT_FLOOR_SEGMENT);
|
||||
if (floorSegment.mbFrac() != oldFloorSegmentMB) {
|
||||
logger.info("updating [floor_segment] from [{}mb] to [{}]", oldFloorSegmentMB, floorSegment);
|
||||
mergePolicy.setFloorSegmentMB(floorSegment.mbFrac());
|
||||
}
|
||||
|
||||
int maxMergeAtOnce = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE, TieredMergePolicyProvider.this.maxMergeAtOnce);
|
||||
if (maxMergeAtOnce != TieredMergePolicyProvider.this.maxMergeAtOnce) {
|
||||
logger.info("updating [max_merge_at_once] from [{}] to [{}]", TieredMergePolicyProvider.this.maxMergeAtOnce, maxMergeAtOnce);
|
||||
TieredMergePolicyProvider.this.maxMergeAtOnce = maxMergeAtOnce;
|
||||
for (CustomTieredMergePolicyProvider policy : policies) {
|
||||
policy.setMaxMergeAtOnce(maxMergeAtOnce);
|
||||
}
|
||||
double oldSegmentsPerTier = mergePolicy.getSegmentsPerTier();
|
||||
double segmentsPerTier = settings.getAsDouble(INDEX_MERGE_POLICY_SEGMENTS_PER_TIER, DEFAULT_SEGMENTS_PER_TIER);
|
||||
if (segmentsPerTier != oldSegmentsPerTier) {
|
||||
logger.info("updating [segments_per_tier] from [{}] to [{}]", oldSegmentsPerTier, segmentsPerTier);
|
||||
mergePolicy.setSegmentsPerTier(segmentsPerTier);
|
||||
}
|
||||
|
||||
int maxMergeAtOnceExplicit = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_EXPLICIT, TieredMergePolicyProvider.this.maxMergeAtOnceExplicit);
|
||||
if (maxMergeAtOnceExplicit != TieredMergePolicyProvider.this.maxMergeAtOnceExplicit) {
|
||||
logger.info("updating [max_merge_at_once_explicit] from [{}] to [{}]", TieredMergePolicyProvider.this.maxMergeAtOnceExplicit, maxMergeAtOnceExplicit);
|
||||
TieredMergePolicyProvider.this.maxMergeAtOnceExplicit = maxMergeAtOnceExplicit;
|
||||
for (CustomTieredMergePolicyProvider policy : policies) {
|
||||
policy.setMaxMergeAtOnceExplicit(maxMergeAtOnceExplicit);
|
||||
}
|
||||
int oldMaxMergeAtOnce = mergePolicy.getMaxMergeAtOnce();
|
||||
int maxMergeAtOnce = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE, DEFAULT_MAX_MERGE_AT_ONCE);
|
||||
if (maxMergeAtOnce != oldMaxMergeAtOnce) {
|
||||
logger.info("updating [max_merge_at_once] from [{}] to [{}]", oldMaxMergeAtOnce, maxMergeAtOnce);
|
||||
maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier);
|
||||
mergePolicy.setMaxMergeAtOnce(maxMergeAtOnce);
|
||||
}
|
||||
|
||||
ByteSizeValue maxMergedSegment = settings.getAsBytesSize(INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT, TieredMergePolicyProvider.this.maxMergedSegment);
|
||||
if (!maxMergedSegment.equals(TieredMergePolicyProvider.this.maxMergedSegment)) {
|
||||
logger.info("updating [max_merged_segment] from [{}] to [{}]", TieredMergePolicyProvider.this.maxMergedSegment, maxMergedSegment);
|
||||
TieredMergePolicyProvider.this.maxMergedSegment = maxMergedSegment;
|
||||
for (CustomTieredMergePolicyProvider policy : policies) {
|
||||
policy.setFloorSegmentMB(maxMergedSegment.mbFrac());
|
||||
}
|
||||
int oldMaxMergeAtOnceExplicit = mergePolicy.getMaxMergeAtOnceExplicit();
|
||||
int maxMergeAtOnceExplicit = settings.getAsInt(INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_EXPLICIT, DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT);
|
||||
if (maxMergeAtOnceExplicit != oldMaxMergeAtOnceExplicit) {
|
||||
logger.info("updating [max_merge_at_once_explicit] from [{}] to [{}]", oldMaxMergeAtOnceExplicit, maxMergeAtOnceExplicit);
|
||||
mergePolicy.setMaxMergeAtOnceExplicit(maxMergeAtOnceExplicit);
|
||||
}
|
||||
|
||||
double segmentsPerTier = settings.getAsDouble(INDEX_MERGE_POLICY_SEGMENTS_PER_TIER, TieredMergePolicyProvider.this.segmentsPerTier);
|
||||
if (segmentsPerTier != TieredMergePolicyProvider.this.segmentsPerTier) {
|
||||
logger.info("updating [segments_per_tier] from [{}] to [{}]", TieredMergePolicyProvider.this.segmentsPerTier, segmentsPerTier);
|
||||
TieredMergePolicyProvider.this.segmentsPerTier = segmentsPerTier;
|
||||
for (CustomTieredMergePolicyProvider policy : policies) {
|
||||
policy.setSegmentsPerTier(segmentsPerTier);
|
||||
}
|
||||
double oldMaxMergedSegmentMB = mergePolicy.getMaxMergedSegmentMB();
|
||||
ByteSizeValue maxMergedSegment = settings.getAsBytesSize(INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT, DEFAULT_MAX_MERGED_SEGMENT);
|
||||
if (maxMergedSegment.mbFrac() != oldMaxMergedSegmentMB) {
|
||||
logger.info("updating [max_merged_segment] from [{}mb] to [{}]", oldMaxMergedSegmentMB, maxMergedSegment);
|
||||
mergePolicy.setMaxMergedSegmentMB(maxMergedSegment.mbFrac());
|
||||
}
|
||||
|
||||
double reclaimDeletesWeight = settings.getAsDouble(INDEX_MERGE_POLICY_RECLAIM_DELETES_WEIGHT, TieredMergePolicyProvider.this.reclaimDeletesWeight);
|
||||
if (reclaimDeletesWeight != TieredMergePolicyProvider.this.reclaimDeletesWeight) {
|
||||
logger.info("updating [reclaim_deletes_weight] from [{}] to [{}]", TieredMergePolicyProvider.this.reclaimDeletesWeight, reclaimDeletesWeight);
|
||||
TieredMergePolicyProvider.this.reclaimDeletesWeight = reclaimDeletesWeight;
|
||||
for (CustomTieredMergePolicyProvider policy : policies) {
|
||||
policy.setReclaimDeletesWeight(reclaimDeletesWeight);
|
||||
}
|
||||
double oldReclaimDeletesWeight = mergePolicy.getReclaimDeletesWeight();
|
||||
double reclaimDeletesWeight = settings.getAsDouble(INDEX_MERGE_POLICY_RECLAIM_DELETES_WEIGHT, DEFAULT_RECLAIM_DELETES_WEIGHT);
|
||||
if (reclaimDeletesWeight != oldReclaimDeletesWeight) {
|
||||
logger.info("updating [reclaim_deletes_weight] from [{}] to [{}]", oldReclaimDeletesWeight, reclaimDeletesWeight);
|
||||
mergePolicy.setReclaimDeletesWeight(reclaimDeletesWeight);
|
||||
}
|
||||
|
||||
final double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(TieredMergePolicyProvider.this.noCFSRatio)));
|
||||
double noCFSRatio = parseNoCFSRatio(settings.get(INDEX_COMPOUND_FORMAT, Double.toString(TieredMergePolicyProvider.this.noCFSRatio)));
|
||||
if (noCFSRatio != TieredMergePolicyProvider.this.noCFSRatio) {
|
||||
logger.info("updating index.compound_format from [{}] to [{}]", formatNoCFSRatio(TieredMergePolicyProvider.this.noCFSRatio), formatNoCFSRatio(noCFSRatio));
|
||||
mergePolicy.setNoCFSRatio(noCFSRatio);
|
||||
TieredMergePolicyProvider.this.noCFSRatio = noCFSRatio;
|
||||
for (CustomTieredMergePolicyProvider policy : policies) {
|
||||
policy.setNoCFSRatio(noCFSRatio);
|
||||
}
|
||||
}
|
||||
|
||||
fixSettingsIfNeeded();
|
||||
}
|
||||
}
|
||||
|
||||
public static class CustomTieredMergePolicyProvider extends TieredMergePolicy {
|
||||
|
||||
private final TieredMergePolicyProvider provider;
|
||||
|
||||
public CustomTieredMergePolicyProvider(TieredMergePolicyProvider provider) {
|
||||
super();
|
||||
this.provider = provider;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
super.close();
|
||||
provider.policies.remove(this);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -27,8 +27,7 @@ import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
|||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
@ -211,11 +210,11 @@ public class CommonTermsQueryParser implements QueryParser {
|
|||
try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
|
||||
source.reset();
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
BytesRefBuilder builder = new BytesRefBuilder();
|
||||
while (source.incrementToken()) {
|
||||
BytesRef ref = new BytesRef(termAtt.length() * 4); // oversize for
|
||||
// UTF-8
|
||||
UnicodeUtil.UTF16toUTF8(termAtt.buffer(), 0, termAtt.length(), ref);
|
||||
query.add(new Term(field, ref));
|
||||
builder.copyChars(termAtt);
|
||||
query.add(new Term(field, builder.toBytesRef()));
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,10 +23,7 @@ import org.apache.lucene.queries.TermFilter;
|
|||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.lucene.search.AndFilter;
|
||||
import org.elasticsearch.common.util.BytesRefHash;
|
||||
|
@ -131,7 +128,7 @@ final class ParentIdsFilter extends Filter {
|
|||
}
|
||||
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
BytesRef uidSpare = new BytesRef();
|
||||
BytesRefBuilder uidSpare = new BytesRefBuilder();
|
||||
BytesRef idSpare = new BytesRef();
|
||||
|
||||
if (acceptDocs == null) {
|
||||
|
@ -148,8 +145,8 @@ final class ParentIdsFilter extends Filter {
|
|||
long size = parentIds.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
parentIds.get(i, idSpare);
|
||||
Uid.createUidAsBytes(parentTypeBr, idSpare, uidSpare);
|
||||
if (termsEnum.seekExact(uidSpare)) {
|
||||
BytesRef uid = Uid.createUidAsBytes(parentTypeBr, idSpare, uidSpare);
|
||||
if (termsEnum.seekExact(uid)) {
|
||||
int docId;
|
||||
docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
|
||||
if (result == null) {
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
package org.elasticsearch.index.store;
|
||||
|
||||
import org.apache.lucene.store.*;
|
||||
import org.elasticsearch.Version;
|
||||
import org.apache.lucene.store.CompoundFileDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FileSwitchDirectory;
|
||||
import org.apache.lucene.store.FilterDirectory;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
||||
/**
|
||||
|
@ -28,10 +30,6 @@ import org.elasticsearch.common.Nullable;
|
|||
*/
|
||||
public final class DirectoryUtils {
|
||||
|
||||
static {
|
||||
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_4_9 : "Remove the special case for NRTCachingDirectory - it implements FilterDirectory in 4.10";
|
||||
}
|
||||
|
||||
private DirectoryUtils() {} // no instance
|
||||
|
||||
/**
|
||||
|
@ -60,8 +58,6 @@ public final class DirectoryUtils {
|
|||
while (true) {
|
||||
if ((current instanceof FilterDirectory)) {
|
||||
current = ((FilterDirectory) current).getDelegate();
|
||||
} else if (current instanceof NRTCachingDirectory) { // remove this when we upgrade to Lucene 4.10
|
||||
current = ((NRTCachingDirectory) current).getDelegate();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -482,8 +482,8 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
|
|||
}
|
||||
Version maxVersion = Version.LUCENE_3_0; // we don't know which version was used to write so we take the max version.
|
||||
for (SegmentCommitInfo info : segmentCommitInfos) {
|
||||
final Version version = Lucene.parseVersionLenient(info.info.getVersion(), Version.LUCENE_3_0);
|
||||
if (version.onOrAfter(maxVersion)) {
|
||||
final Version version = info.info.getVersion();
|
||||
if (version != null && version.onOrAfter(maxVersion)) {
|
||||
maxVersion = version;
|
||||
}
|
||||
for (String file : info.files()) {
|
||||
|
|
|
@ -137,7 +137,7 @@ public class StoreFileMetaData implements Streamable {
|
|||
out.writeVLong(length);
|
||||
out.writeOptionalString(checksum);
|
||||
if (out.getVersion().onOrAfter(org.elasticsearch.Version.V_1_3_0)) {
|
||||
out.writeOptionalString(writtenBy == null ? null : writtenBy.name());
|
||||
out.writeOptionalString(writtenBy == null ? null : writtenBy.toString());
|
||||
}
|
||||
if (out.getVersion().onOrAfter(org.elasticsearch.Version.V_1_4_0)) {
|
||||
out.writeBytesRef(hash);
|
||||
|
|
|
@ -117,7 +117,7 @@ public final class RecoveryFileChunkRequest extends TransportRequest { // publi
|
|||
out.writeOptionalString(metaData.checksum());
|
||||
out.writeBytesReference(content);
|
||||
if (out.getVersion().onOrAfter(org.elasticsearch.Version.V_1_3_0)) {
|
||||
out.writeOptionalString(metaData.writtenBy() == null ? null : metaData.writtenBy().name());
|
||||
out.writeOptionalString(metaData.writtenBy() == null ? null : metaData.writtenBy().toString());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ package org.elasticsearch.script.expression;
|
|||
|
||||
import org.apache.lucene.expressions.Bindings;
|
||||
import org.apache.lucene.expressions.Expression;
|
||||
import org.apache.lucene.expressions.XSimpleBindings;
|
||||
import org.apache.lucene.expressions.SimpleBindings;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
@ -39,7 +39,7 @@ import java.util.Map;
|
|||
class ExpressionScript implements SearchScript {
|
||||
|
||||
final Expression expression;
|
||||
final XSimpleBindings bindings;
|
||||
final SimpleBindings bindings;
|
||||
final ValueSource source;
|
||||
final ReplaceableConstValueSource specialValue; // _value
|
||||
Map<String, Scorer> context;
|
||||
|
@ -47,7 +47,7 @@ class ExpressionScript implements SearchScript {
|
|||
FunctionValues values;
|
||||
int docid;
|
||||
|
||||
ExpressionScript(Expression e, XSimpleBindings b, ReplaceableConstValueSource v) {
|
||||
ExpressionScript(Expression e, SimpleBindings b, ReplaceableConstValueSource v) {
|
||||
expression = e;
|
||||
bindings = b;
|
||||
context = Collections.EMPTY_MAP;
|
||||
|
|
|
@ -20,9 +20,9 @@
|
|||
package org.elasticsearch.script.expression;
|
||||
|
||||
import org.apache.lucene.expressions.Expression;
|
||||
import org.apache.lucene.expressions.XSimpleBindings;
|
||||
import org.apache.lucene.expressions.js.XJavascriptCompiler;
|
||||
import org.apache.lucene.expressions.js.XVariableContext;
|
||||
import org.apache.lucene.expressions.SimpleBindings;
|
||||
import org.apache.lucene.expressions.js.JavascriptCompiler;
|
||||
import org.apache.lucene.expressions.js.VariableContext;
|
||||
import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
@ -71,7 +71,7 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
|
|||
public Object compile(String script) {
|
||||
try {
|
||||
// NOTE: validation is delayed to allow runtime vars, and we don't have access to per index stuff here
|
||||
return XJavascriptCompiler.compile(script);
|
||||
return JavascriptCompiler.compile(script);
|
||||
} catch (ParseException e) {
|
||||
throw new ExpressionScriptCompilationException("Failed to parse expression: " + script, e);
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
|
|||
MapperService mapper = lookup.doc().mapperService();
|
||||
// NOTE: if we need to do anything complicated with bindings in the future, we can just extend Bindings,
|
||||
// instead of complicating SimpleBindings (which should stay simple)
|
||||
XSimpleBindings bindings = new XSimpleBindings();
|
||||
SimpleBindings bindings = new SimpleBindings();
|
||||
ReplaceableConstValueSource specialValue = null;
|
||||
|
||||
for (String variable : expr.variables) {
|
||||
|
@ -109,14 +109,14 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
|
|||
}
|
||||
|
||||
} else {
|
||||
XVariableContext[] parts = XVariableContext.parse(variable);
|
||||
VariableContext[] parts = VariableContext.parse(variable);
|
||||
if (parts[0].text.equals("doc") == false) {
|
||||
throw new ExpressionScriptCompilationException("Unknown variable [" + parts[0].text + "] in expression");
|
||||
}
|
||||
if (parts.length < 2 || parts[1].type != XVariableContext.Type.STR_INDEX) {
|
||||
if (parts.length < 2 || parts[1].type != VariableContext.Type.STR_INDEX) {
|
||||
throw new ExpressionScriptCompilationException("Variable 'doc' in expression must be used with a specific field like: doc['myfield'].value");
|
||||
}
|
||||
if (parts.length < 3 || parts[2].type != XVariableContext.Type.MEMBER || parts[2].text.equals("value") == false) {
|
||||
if (parts.length < 3 || parts[2].type != VariableContext.Type.MEMBER || parts[2].text.equals("value") == false) {
|
||||
throw new ExpressionScriptCompilationException("Invalid member for field data in expression. Only '.value' is currently supported.");
|
||||
}
|
||||
String fieldname = parts[1].text;
|
||||
|
|
|
@ -23,6 +23,7 @@ package org.elasticsearch.search;
|
|||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.index.fielddata.FieldData;
|
||||
|
@ -439,7 +440,7 @@ public enum MultiValueMode {
|
|||
*/
|
||||
public NumericDocValues select(final SortedNumericDocValues values, final long missingValue, final FixedBitSet rootDocs, final FixedBitSet innerDocs, int maxDoc) {
|
||||
if (rootDocs == null || innerDocs == null) {
|
||||
return select(FieldData.emptySortedNumeric(maxDoc), missingValue);
|
||||
return select(DocValues.emptySortedNumeric(maxDoc), missingValue);
|
||||
}
|
||||
return new NumericDocValues() {
|
||||
|
||||
|
@ -625,7 +626,7 @@ public enum MultiValueMode {
|
|||
}
|
||||
return new BinaryDocValues() {
|
||||
|
||||
final BytesRef spare = new BytesRef();
|
||||
final BytesRefBuilder spare = new BytesRefBuilder();
|
||||
|
||||
@Override
|
||||
public BytesRef get(int rootDoc) {
|
||||
|
@ -637,7 +638,7 @@ public enum MultiValueMode {
|
|||
final int prevRootDoc = rootDocs.prevSetBit(rootDoc - 1);
|
||||
final int firstNestedDoc = innerDocs.nextSetBit(prevRootDoc + 1);
|
||||
|
||||
BytesRef accumulated = null;
|
||||
BytesRefBuilder accumulated = null;
|
||||
|
||||
for (int doc = firstNestedDoc; doc != -1 && doc < rootDoc; doc = innerDocs.nextSetBit(doc + 1)) {
|
||||
values.setDocument(doc);
|
||||
|
@ -647,7 +648,7 @@ public enum MultiValueMode {
|
|||
spare.copyBytes(innerValue);
|
||||
accumulated = spare;
|
||||
} else {
|
||||
final BytesRef applied = apply(accumulated, innerValue);
|
||||
final BytesRef applied = apply(accumulated.get(), innerValue);
|
||||
if (applied == innerValue) {
|
||||
accumulated.copyBytes(innerValue);
|
||||
}
|
||||
|
@ -655,7 +656,7 @@ public enum MultiValueMode {
|
|||
}
|
||||
}
|
||||
|
||||
return accumulated == null ? missingValue : accumulated;
|
||||
return accumulated == null ? missingValue : accumulated.get();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
package org.elasticsearch.search.aggregations;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
|
||||
|
@ -42,8 +42,8 @@ public class RecordingPerReaderBucketCollector extends RecordingBucketCollector
|
|||
|
||||
static class PerSegmentCollects {
|
||||
AtomicReaderContext readerContext;
|
||||
AppendingPackedLongBuffer docs;
|
||||
AppendingPackedLongBuffer buckets;
|
||||
PackedLongValues.Builder docs;
|
||||
PackedLongValues.Builder buckets;
|
||||
int lastDocId = 0;
|
||||
|
||||
PerSegmentCollects(AtomicReaderContext readerContext) {
|
||||
|
@ -54,7 +54,7 @@ public class RecordingPerReaderBucketCollector extends RecordingBucketCollector
|
|||
if (docs == null) {
|
||||
// TODO unclear what might be reasonable constructor args to pass to this collection
|
||||
// No way of accurately predicting how many docs will be collected
|
||||
docs = new AppendingPackedLongBuffer();
|
||||
docs = PackedLongValues.packedBuilder(PackedInts.COMPACT);
|
||||
}
|
||||
// Store as delta-encoded for better compression
|
||||
docs.add(doc - lastDocId);
|
||||
|
@ -63,7 +63,7 @@ public class RecordingPerReaderBucketCollector extends RecordingBucketCollector
|
|||
if (owningBucketOrdinal != 0) {
|
||||
// Store all of the prior bucketOrds (which up until now have
|
||||
// all been zero based)
|
||||
buckets = new AppendingPackedLongBuffer();
|
||||
buckets = PackedLongValues.packedBuilder(PackedInts.COMPACT);
|
||||
for (int i = 0; i < docs.size() - 1; i++) {
|
||||
buckets.add(0);
|
||||
}
|
||||
|
@ -75,12 +75,6 @@ public class RecordingPerReaderBucketCollector extends RecordingBucketCollector
|
|||
}
|
||||
}
|
||||
void endCollect() {
|
||||
if (docs != null) {
|
||||
docs.freeze();
|
||||
}
|
||||
if (buckets != null) {
|
||||
buckets.freeze();
|
||||
}
|
||||
}
|
||||
|
||||
boolean hasItems() {
|
||||
|
@ -94,15 +88,15 @@ public class RecordingPerReaderBucketCollector extends RecordingBucketCollector
|
|||
return;
|
||||
}
|
||||
if (buckets == null) {
|
||||
final AppendingDeltaPackedLongBuffer.Iterator docsIter = docs.iterator();
|
||||
final PackedLongValues.Iterator docsIter = docs.build().iterator();
|
||||
while (docsIter.hasNext()) {
|
||||
lastDocId += (int) docsIter.next();
|
||||
collector.collect(lastDocId, 0);
|
||||
}
|
||||
} else {
|
||||
assert docs.size() == buckets.size();
|
||||
final AppendingDeltaPackedLongBuffer.Iterator docsIter = docs.iterator();
|
||||
final AppendingDeltaPackedLongBuffer.Iterator bucketsIter = buckets.iterator();
|
||||
final PackedLongValues.Iterator docsIter = docs.build().iterator();
|
||||
final PackedLongValues.Iterator bucketsIter = buckets.build().iterator();
|
||||
while (docsIter.hasNext()) {
|
||||
lastDocId += (int) docsIter.next();
|
||||
collector.collect(lastDocId, bucketsIter.next());
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket.terms;
|
|||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.util.BytesRefHash;
|
||||
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
||||
|
@ -44,7 +45,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
|
|||
protected final BytesRefHash bucketOrds;
|
||||
private final IncludeExclude includeExclude;
|
||||
private SortedBinaryDocValues values;
|
||||
private final BytesRef previous;
|
||||
private final BytesRefBuilder previous;
|
||||
|
||||
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||
InternalOrder order, BucketCountThresholds bucketCountThresholds,
|
||||
|
@ -54,7 +55,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
|
|||
this.valuesSource = valuesSource;
|
||||
this.includeExclude = includeExclude;
|
||||
bucketOrds = new BytesRefHash(estimatedBucketCount, aggregationContext.bigArrays());
|
||||
previous = new BytesRef();
|
||||
previous = new BytesRefBuilder();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -74,13 +75,13 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
|
|||
final int valuesCount = values.count();
|
||||
|
||||
// SortedBinaryDocValues don't guarantee uniqueness so we need to take care of dups
|
||||
previous.length = 0;
|
||||
previous.clear();
|
||||
for (int i = 0; i < valuesCount; ++i) {
|
||||
final BytesRef bytes = values.valueAt(i);
|
||||
if (includeExclude != null && !includeExclude.accept(bytes)) {
|
||||
continue;
|
||||
}
|
||||
if (previous.equals(bytes)) {
|
||||
if (previous.get().equals(bytes)) {
|
||||
continue;
|
||||
}
|
||||
long bucketOrdinal = bucketOrds.add(bytes);
|
||||
|
|
|
@ -21,9 +21,8 @@ package org.elasticsearch.search.aggregations.bucket.terms.support;
|
|||
import org.apache.lucene.index.RandomAccessOrds;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
@ -43,7 +42,7 @@ public class IncludeExclude {
|
|||
|
||||
private final Matcher include;
|
||||
private final Matcher exclude;
|
||||
private final CharsRef scratch = new CharsRef();
|
||||
private final CharsRefBuilder scratch = new CharsRefBuilder();
|
||||
|
||||
/**
|
||||
* @param include The regular expression pattern for the terms to be included
|
||||
|
@ -61,18 +60,18 @@ public class IncludeExclude {
|
|||
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
|
||||
*/
|
||||
public boolean accept(BytesRef value) {
|
||||
UnicodeUtil.UTF8toUTF16(value, scratch);
|
||||
scratch.copyUTF8Bytes(value);
|
||||
if (include == null) {
|
||||
// exclude must not be null
|
||||
return !exclude.reset(scratch).matches();
|
||||
return !exclude.reset(scratch.get()).matches();
|
||||
}
|
||||
if (!include.reset(scratch).matches()) {
|
||||
if (!include.reset(scratch.get()).matches()) {
|
||||
return false;
|
||||
}
|
||||
if (exclude == null) {
|
||||
return true;
|
||||
}
|
||||
return !exclude.reset(scratch).matches();
|
||||
return !exclude.reset(scratch.get()).matches();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -45,7 +45,7 @@ public class ScriptBytesValues extends SortingBinaryDocValues implements ScriptV
|
|||
|
||||
private void set(int i, Object o) {
|
||||
if (o == null) {
|
||||
values[i].length = 0;
|
||||
values[i].clear();
|
||||
} else {
|
||||
values[i].copyChars(o.toString());
|
||||
}
|
||||
|
|
|
@ -20,7 +20,8 @@
|
|||
package org.elasticsearch.search.lookup;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
@ -36,17 +37,19 @@ public class CachedPositionIterator extends PositionIterator {
|
|||
|
||||
// all payloads of the term in the current document in one bytes array.
|
||||
// payloadStarts and payloadLength mark the start and end of one payload.
|
||||
final BytesRef payloads = new BytesRef();
|
||||
final BytesRefBuilder payloads = new BytesRefBuilder();
|
||||
|
||||
final IntsRef payloadsLengths = new IntsRef(0);
|
||||
final IntsRefBuilder payloadsLengths = new IntsRefBuilder();
|
||||
|
||||
final IntsRef payloadsStarts = new IntsRef(0);
|
||||
final IntsRefBuilder payloadsStarts = new IntsRefBuilder();
|
||||
|
||||
final IntsRef positions = new IntsRef(0);
|
||||
final IntsRefBuilder positions = new IntsRefBuilder();
|
||||
|
||||
final IntsRef startOffsets = new IntsRef(0);
|
||||
final IntsRefBuilder startOffsets = new IntsRefBuilder();
|
||||
|
||||
final IntsRef endOffsets = new IntsRef(0);
|
||||
final IntsRefBuilder endOffsets = new IntsRefBuilder();
|
||||
|
||||
final BytesRef payload = new BytesRef();
|
||||
|
||||
@Override
|
||||
public Iterator<TermPosition> reset() {
|
||||
|
@ -61,12 +64,13 @@ public class CachedPositionIterator extends PositionIterator {
|
|||
|
||||
@Override
|
||||
public TermPosition next() {
|
||||
termPosition.position = positions.ints[pos];
|
||||
termPosition.startOffset = startOffsets.ints[pos];
|
||||
termPosition.endOffset = endOffsets.ints[pos];
|
||||
termPosition.payload = payloads;
|
||||
payloads.offset = payloadsStarts.ints[pos];
|
||||
payloads.length = payloadsLengths.ints[pos];
|
||||
termPosition.position = positions.intAt(pos);
|
||||
termPosition.startOffset = startOffsets.intAt(pos);
|
||||
termPosition.endOffset = endOffsets.intAt(pos);
|
||||
termPosition.payload = payload;
|
||||
payload.bytes = payloads.bytes();
|
||||
payload.offset = payloadsStarts.intAt(pos);
|
||||
payload.length = payloadsLengths.intAt(pos);
|
||||
pos++;
|
||||
return termPosition;
|
||||
}
|
||||
|
@ -82,44 +86,34 @@ public class CachedPositionIterator extends PositionIterator {
|
|||
TermPosition termPosition;
|
||||
for (int i = 0; i < freq; i++) {
|
||||
termPosition = super.next();
|
||||
positions.ints[i] = termPosition.position;
|
||||
positions.setIntAt(i, termPosition.position);
|
||||
addPayload(i, termPosition.payload);
|
||||
startOffsets.ints[i] = termPosition.startOffset;
|
||||
endOffsets.ints[i] = termPosition.endOffset;
|
||||
startOffsets.setIntAt(i, termPosition.startOffset);
|
||||
endOffsets.setIntAt(i, termPosition.endOffset);
|
||||
}
|
||||
}
|
||||
private void ensureSize(int freq) {
|
||||
if (freq == 0) {
|
||||
return;
|
||||
}
|
||||
if (startOffsets.ints.length < freq) {
|
||||
startOffsets.grow(freq);
|
||||
endOffsets.grow(freq);
|
||||
positions.grow(freq);
|
||||
payloadsLengths.grow(freq);
|
||||
payloadsStarts.grow(freq);
|
||||
}
|
||||
payloads.offset = 0;
|
||||
payloadsLengths.offset = 0;
|
||||
payloadsStarts.offset = 0;
|
||||
startOffsets.grow(freq);
|
||||
endOffsets.grow(freq);
|
||||
positions.grow(freq);
|
||||
payloadsLengths.grow(freq);
|
||||
payloadsStarts.grow(freq);
|
||||
payloads.grow(freq * 8);// this is just a guess....
|
||||
|
||||
}
|
||||
|
||||
private void addPayload(int i, BytesRef currPayload) {
|
||||
if (currPayload != null) {
|
||||
payloadsLengths.ints[i] = currPayload.length;
|
||||
payloadsStarts.ints[i] = i == 0 ? 0 : payloadsStarts.ints[i - 1] + payloadsLengths.ints[i - 1];
|
||||
if (payloads.bytes.length < payloadsStarts.ints[i] + payloadsLengths.ints[i]) {
|
||||
payloads.offset = 0; // the offset serves no purpose here. but
|
||||
// we must assure that it is 0 before
|
||||
// grow() is called
|
||||
payloads.grow(payloads.bytes.length * 2); // just a guess
|
||||
}
|
||||
System.arraycopy(currPayload.bytes, currPayload.offset, payloads.bytes, payloadsStarts.ints[i], currPayload.length);
|
||||
payloadsLengths.setIntAt(i, currPayload.length);
|
||||
payloadsStarts.setIntAt(i, i == 0 ? 0 : payloadsStarts.intAt(i - 1) + payloadsLengths.intAt(i - 1));
|
||||
payloads.grow(payloadsStarts.intAt(i) + currPayload.length);
|
||||
System.arraycopy(currPayload.bytes, currPayload.offset, payloads.bytes(), payloadsStarts.intAt(i), currPayload.length);
|
||||
} else {
|
||||
payloadsLengths.ints[i] = 0;
|
||||
payloadsStarts.ints[i] = i == 0 ? 0 : payloadsStarts.ints[i - 1] + payloadsLengths.ints[i - 1];
|
||||
payloadsLengths.setIntAt(i, 0);
|
||||
payloadsStarts.setIntAt(i, i == 0 ? 0 : payloadsStarts.intAt(i - 1) + payloadsLengths.intAt(i - 1));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,8 +21,7 @@ package org.elasticsearch.search.lookup;
|
|||
|
||||
import org.apache.lucene.analysis.payloads.PayloadHelper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
|
||||
public class TermPosition {
|
||||
|
||||
|
@ -30,11 +29,11 @@ public class TermPosition {
|
|||
public int startOffset = -1;
|
||||
public int endOffset = -1;
|
||||
public BytesRef payload;
|
||||
private CharsRef spare = new CharsRef(0);
|
||||
private CharsRefBuilder spare = new CharsRefBuilder();
|
||||
|
||||
public String payloadAsString() {
|
||||
if (payload != null && payload.length != 0) {
|
||||
UnicodeUtil.UTF8toUTF16(payload.bytes, payload.offset, payload.length, spare);
|
||||
spare.copyUTF8Bytes(payload);
|
||||
return spare.toString();
|
||||
} else {
|
||||
return null;
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.search.Filter;
|
|||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.cache.fixedbitset.FixedBitSetFilter;
|
||||
|
@ -156,12 +157,12 @@ public class ScriptSortParser implements SortParser {
|
|||
protected SortedBinaryDocValues getValues(AtomicReaderContext context) {
|
||||
searchScript.setNextReader(context);
|
||||
final BinaryDocValues values = new BinaryDocValues() {
|
||||
final BytesRef spare = new BytesRef();
|
||||
final BytesRefBuilder spare = new BytesRefBuilder();
|
||||
@Override
|
||||
public BytesRef get(int docID) {
|
||||
searchScript.setNextDocId(docID);
|
||||
spare.copyChars(searchScript.run().toString());
|
||||
return spare;
|
||||
return spare.get();
|
||||
}
|
||||
};
|
||||
return FieldData.singleton(values, null);
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.elasticsearch.search.suggest;
|
|||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
|
@ -76,7 +76,7 @@ public class SuggestPhase extends AbstractComponent implements SearchPhase {
|
|||
|
||||
public Suggest execute(SuggestionSearchContext suggest, IndexReader reader) {
|
||||
try {
|
||||
CharsRef spare = new CharsRef(); // Maybe add CharsRef to CacheRecycler?
|
||||
CharsRefBuilder spare = new CharsRefBuilder();
|
||||
final List<Suggestion<? extends Entry<? extends Option>>> suggestions = new ArrayList<>(suggest.suggestions().size());
|
||||
|
||||
for (Map.Entry<String, SuggestionSearchContext.SuggestionContext> entry : suggest.suggestions().entrySet()) {
|
||||
|
|
|
@ -25,8 +25,9 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.search.spell.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
|
@ -78,32 +79,14 @@ public final class SuggestUtils {
|
|||
return directSpellChecker;
|
||||
}
|
||||
|
||||
public static BytesRef join(BytesRef separator, BytesRef result, BytesRef... toJoin) {
|
||||
int len = separator.length * toJoin.length - 1;
|
||||
for (BytesRef br : toJoin) {
|
||||
len += br.length;
|
||||
}
|
||||
|
||||
result.grow(len);
|
||||
return joinPreAllocated(separator, result, toJoin);
|
||||
}
|
||||
|
||||
public static BytesRef joinPreAllocated(BytesRef separator, BytesRef result, BytesRef... toJoin) {
|
||||
result.length = 0;
|
||||
result.offset = 0;
|
||||
public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) {
|
||||
result.clear();
|
||||
for (int i = 0; i < toJoin.length - 1; i++) {
|
||||
BytesRef br = toJoin[i];
|
||||
System.arraycopy(br.bytes, br.offset, result.bytes, result.offset, br.length);
|
||||
result.offset += br.length;
|
||||
System.arraycopy(separator.bytes, separator.offset, result.bytes, result.offset, separator.length);
|
||||
result.offset += separator.length;
|
||||
result.append(toJoin[i]);
|
||||
result.append(separator);
|
||||
}
|
||||
final BytesRef br = toJoin[toJoin.length-1];
|
||||
System.arraycopy(br.bytes, br.offset, result.bytes, result.offset, br.length);
|
||||
|
||||
result.length = result.offset + br.length;
|
||||
result.offset = 0;
|
||||
return result;
|
||||
result.append(toJoin[toJoin.length-1]);
|
||||
return result.get();
|
||||
}
|
||||
|
||||
public static abstract class TokenConsumer {
|
||||
|
@ -117,12 +100,9 @@ public final class SuggestUtils {
|
|||
offsetAttr = stream.addAttribute(OffsetAttribute.class);
|
||||
}
|
||||
|
||||
protected BytesRef fillBytesRef(BytesRef spare) {
|
||||
spare.offset = 0;
|
||||
spare.length = spare.bytes.length;
|
||||
char[] source = charTermAttr.buffer();
|
||||
UnicodeUtil.UTF16toUTF8(source, 0, charTermAttr.length(), spare);
|
||||
return spare;
|
||||
protected BytesRef fillBytesRef(BytesRefBuilder spare) {
|
||||
spare.copyChars(charTermAttr);
|
||||
return spare.get();
|
||||
}
|
||||
|
||||
public abstract void nextToken() throws IOException;
|
||||
|
@ -130,9 +110,9 @@ public final class SuggestUtils {
|
|||
public void end() {}
|
||||
}
|
||||
|
||||
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRef spare) throws IOException {
|
||||
UnicodeUtil.UTF8toUTF16(toAnalyze, spare);
|
||||
return analyze(analyzer, spare, field, consumer);
|
||||
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare) throws IOException {
|
||||
spare.copyUTF8Bytes(toAnalyze);
|
||||
return analyze(analyzer, spare.get(), field, consumer);
|
||||
}
|
||||
|
||||
public static int analyze(Analyzer analyzer, CharsRef toAnalyze, String field, TokenConsumer consumer) throws IOException {
|
||||
|
|
|
@ -20,21 +20,21 @@
|
|||
package org.elasticsearch.search.suggest;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public abstract class Suggester<T extends SuggestionSearchContext.SuggestionContext> {
|
||||
|
||||
protected abstract Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>
|
||||
innerExecute(String name, T suggestion, IndexReader indexReader, CharsRef spare) throws IOException;
|
||||
innerExecute(String name, T suggestion, IndexReader indexReader, CharsRefBuilder spare) throws IOException;
|
||||
|
||||
public abstract String[] names();
|
||||
|
||||
public abstract SuggestContextParser getContextParser();
|
||||
|
||||
public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>
|
||||
execute(String name, T suggestion, IndexReader indexReader, CharsRef spare) throws IOException {
|
||||
execute(String name, T suggestion, IndexReader indexReader, CharsRefBuilder spare) throws IOException {
|
||||
// #3469 We want to ignore empty shards
|
||||
if (indexReader.numDocs() == 0) {
|
||||
return null;
|
||||
|
|
|
@ -187,7 +187,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
|||
@Override
|
||||
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
|
||||
analyzingSuggestLookupProvider.parsePayload(payload, spare);
|
||||
builder.addSurface(spare.surfaceForm, spare.payload, spare.weight);
|
||||
builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
|
||||
// multi fields have the same surface form so we sum up here
|
||||
maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
|
||||
}
|
||||
|
|
|
@ -25,10 +25,7 @@ import org.apache.lucene.index.*;
|
|||
import org.apache.lucene.index.FilterAtomicReader.FilterTerms;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.store.IOContext.Context;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
|
@ -342,12 +339,12 @@ public class Completion090PostingsFormat extends PostingsFormat {
|
|||
ref.weight = input.readVLong() - 1;
|
||||
int len = input.readVInt();
|
||||
ref.surfaceForm.grow(len);
|
||||
ref.surfaceForm.length = len;
|
||||
input.readBytes(ref.surfaceForm.bytes, ref.surfaceForm.offset, ref.surfaceForm.length);
|
||||
ref.surfaceForm.setLength(len);
|
||||
input.readBytes(ref.surfaceForm.bytes(), 0, ref.surfaceForm.length());
|
||||
len = input.readVInt();
|
||||
ref.payload.grow(len);
|
||||
ref.payload.length = len;
|
||||
input.readBytes(ref.payload.bytes, ref.payload.offset, ref.payload.length);
|
||||
ref.payload.setLength(len);
|
||||
input.readBytes(ref.payload.bytes(), 0, ref.payload.length());
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,9 +24,8 @@ import org.apache.lucene.index.AtomicReaderContext;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.text.StringText;
|
||||
|
@ -49,13 +48,13 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
|
|||
|
||||
@Override
|
||||
protected Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name,
|
||||
CompletionSuggestionContext suggestionContext, IndexReader indexReader, CharsRef spare) throws IOException {
|
||||
CompletionSuggestionContext suggestionContext, IndexReader indexReader, CharsRefBuilder spare) throws IOException {
|
||||
if (suggestionContext.mapper() == null || !(suggestionContext.mapper() instanceof CompletionFieldMapper)) {
|
||||
throw new ElasticsearchException("Field [" + suggestionContext.getField() + "] is not a completion suggest field");
|
||||
}
|
||||
|
||||
CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
|
||||
UnicodeUtil.UTF8toUTF16(suggestionContext.getText(), spare);
|
||||
spare.copyUTF8Bytes(suggestionContext.getText());
|
||||
|
||||
CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new StringText(spare.toString()), 0, spare.length());
|
||||
completionSuggestion.addTerm(completionSuggestEntry);
|
||||
|
@ -73,7 +72,7 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
|
|||
// docs from the segment that had a value in this segment.
|
||||
continue;
|
||||
}
|
||||
List<Lookup.LookupResult> lookupResults = lookup.lookup(spare, false, suggestionContext.getSize());
|
||||
List<Lookup.LookupResult> lookupResults = lookup.lookup(spare.get(), false, suggestionContext.getSize());
|
||||
for (Lookup.LookupResult res : lookupResults) {
|
||||
|
||||
final String key = res.key.toString();
|
||||
|
|
|
@ -76,7 +76,7 @@ public final class CompletionTokenStream extends TokenStream {
|
|||
* produced. Multi Fields have the same surface form and therefore sum up
|
||||
*/
|
||||
posInc = 0;
|
||||
Util.toBytesRef(finiteStrings.next(), bytesAtt.getBytesRef()); // now we have UTF-8
|
||||
Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8
|
||||
if (charTermAttribute != null) {
|
||||
charTermAttribute.setLength(0);
|
||||
charTermAttribute.append(bytesAtt.toUTF16());
|
||||
|
@ -123,12 +123,17 @@ public final class CompletionTokenStream extends TokenStream {
|
|||
public interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||
// marker interface
|
||||
|
||||
/**
|
||||
* Return the builder from which the term is derived.
|
||||
*/
|
||||
public BytesRefBuilder builder();
|
||||
|
||||
public CharSequence toUTF16();
|
||||
}
|
||||
|
||||
public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
|
||||
private final BytesRef bytes = new BytesRef();
|
||||
private CharsRef charsRef;
|
||||
private final BytesRefBuilder bytes = new BytesRefBuilder();
|
||||
private CharsRefBuilder charsRef;
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
|
@ -136,13 +141,18 @@ public final class CompletionTokenStream extends TokenStream {
|
|||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
public BytesRefBuilder builder() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
bytes.length = 0;
|
||||
bytes.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -154,10 +164,10 @@ public final class CompletionTokenStream extends TokenStream {
|
|||
@Override
|
||||
public CharSequence toUTF16() {
|
||||
if (charsRef == null) {
|
||||
charsRef = new CharsRef();
|
||||
charsRef = new CharsRefBuilder();
|
||||
}
|
||||
UnicodeUtil.UTF8toUTF16(bytes, charsRef);
|
||||
return charsRef;
|
||||
charsRef.copyUTF8Bytes(getBytesRef());
|
||||
return charsRef.get();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -30,8 +31,8 @@ interface PayloadProcessor {
|
|||
void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException;
|
||||
|
||||
static class SuggestPayload {
|
||||
final BytesRef payload = new BytesRef();
|
||||
final BytesRefBuilder payload = new BytesRefBuilder();
|
||||
long weight = 0;
|
||||
final BytesRef surfaceForm = new BytesRef();
|
||||
final BytesRefBuilder surfaceForm = new BytesRefBuilder();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,8 +25,9 @@ import com.google.common.collect.Lists;
|
|||
import org.apache.lucene.analysis.PrefixAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
@ -278,9 +279,9 @@ public class CategoryContextMapping extends ContextMapping {
|
|||
public Automaton toAutomaton() {
|
||||
List<Automaton> automatons = new ArrayList<>();
|
||||
for (CharSequence value : values) {
|
||||
automatons.add(BasicAutomata.makeString(value.toString()));
|
||||
automatons.add(Automata.makeString(value.toString()));
|
||||
}
|
||||
return Automaton.union(automatons);
|
||||
return Operations.union(automatons);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -24,9 +24,9 @@ import com.google.common.collect.Lists;
|
|||
import com.google.common.collect.Maps;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
|
@ -246,21 +246,20 @@ public abstract class ContextMapping implements ToXContent {
|
|||
* @return Automaton matching the given Query
|
||||
*/
|
||||
public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
|
||||
Automaton a = BasicAutomata.makeEmptyString();
|
||||
Automaton a = Automata.makeEmptyString();
|
||||
|
||||
Automaton gap = BasicAutomata.makeChar(ContextMapping.SEPARATOR);
|
||||
Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR);
|
||||
if (preserveSep) {
|
||||
// if separators are preserved the fst contains a SEP_LABEL
|
||||
// behind each gap. To have a matching automaton, we need to
|
||||
// include the SEP_LABEL in the query as well
|
||||
gap = BasicOperations.concatenate(gap, BasicAutomata.makeChar(XAnalyzingSuggester.SEP_LABEL));
|
||||
gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL));
|
||||
}
|
||||
|
||||
for (ContextQuery query : queries) {
|
||||
a = Automaton.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
|
||||
a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
|
||||
}
|
||||
BasicOperations.determinize(a);
|
||||
return a;
|
||||
return Operations.determinize(a);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -24,9 +24,9 @@ import com.google.common.collect.Lists;
|
|||
import org.apache.lucene.analysis.PrefixAnalyzer.PrefixTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.common.geo.GeoHashUtils;
|
||||
|
@ -689,12 +689,12 @@ public class GeolocationContextMapping extends ContextMapping {
|
|||
public Automaton toAutomaton() {
|
||||
Automaton automaton;
|
||||
if(precisions == null || precisions.length == 0) {
|
||||
automaton = BasicAutomata.makeString(location);
|
||||
automaton = Automata.makeString(location);
|
||||
} else {
|
||||
automaton = BasicAutomata.makeString(location.substring(0, Math.max(1, Math.min(location.length(), precisions[0]))));
|
||||
automaton = Automata.makeString(location.substring(0, Math.max(1, Math.min(location.length(), precisions[0]))));
|
||||
for (int i = 1; i < precisions.length; i++) {
|
||||
final String cell = location.substring(0, Math.max(1, Math.min(location.length(), precisions[i])));
|
||||
automaton = BasicOperations.union(automaton, BasicAutomata.makeString(cell));
|
||||
automaton = Operations.union(automaton, Automata.makeString(cell));
|
||||
}
|
||||
}
|
||||
return automaton;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
|
||||
|
||||
|
@ -46,10 +47,10 @@ public final class Correction implements Comparable<Correction> {
|
|||
}
|
||||
|
||||
public BytesRef join(BytesRef separator, BytesRef preTag, BytesRef postTag) {
|
||||
return join(separator, new BytesRef(), preTag, postTag);
|
||||
return join(separator, new BytesRefBuilder(), preTag, postTag);
|
||||
}
|
||||
|
||||
public BytesRef join(BytesRef separator, BytesRef result, BytesRef preTag, BytesRef postTag) {
|
||||
public BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef preTag, BytesRef postTag) {
|
||||
BytesRef[] toJoin = new BytesRef[this.candidates.length];
|
||||
int len = separator.length * this.candidates.length - 1;
|
||||
for (int i = 0; i < toJoin.length; i++) {
|
||||
|
@ -58,7 +59,8 @@ public final class Correction implements Comparable<Correction> {
|
|||
toJoin[i] = candidate.term;
|
||||
} else {
|
||||
final int maxLen = preTag.length + postTag.length + candidate.term.length;
|
||||
final BytesRef highlighted = new BytesRef(maxLen);// just allocate once
|
||||
final BytesRefBuilder highlighted = new BytesRefBuilder();// just allocate once
|
||||
highlighted.grow(maxLen);
|
||||
if (i == 0 || candidates[i-1].userInput) {
|
||||
highlighted.append(preTag);
|
||||
}
|
||||
|
@ -66,13 +68,12 @@ public final class Correction implements Comparable<Correction> {
|
|||
if (toJoin.length == i + 1 || candidates[i+1].userInput) {
|
||||
highlighted.append(postTag);
|
||||
}
|
||||
toJoin[i] = highlighted;
|
||||
toJoin[i] = highlighted.get();
|
||||
}
|
||||
len += toJoin[i].length;
|
||||
}
|
||||
result.offset = 0;
|
||||
result.grow(len);
|
||||
return SuggestUtils.joinPreAllocated(separator, result, toJoin);
|
||||
return SuggestUtils.join(separator, result, toJoin);
|
||||
}
|
||||
|
||||
/** Lower scores sorts first; if scores are equal,
|
||||
|
|
|
@ -24,17 +24,13 @@ import org.apache.lucene.search.spell.DirectSpellChecker;
|
|||
import org.apache.lucene.search.spell.SuggestMode;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.*;
|
||||
|
||||
//TODO public for tests
|
||||
public final class DirectCandidateGenerator extends CandidateGenerator {
|
||||
|
@ -51,8 +47,8 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
private final Analyzer postFilter;
|
||||
private final double nonErrorLikelihood;
|
||||
private final boolean useTotalTermFrequency;
|
||||
private final CharsRef spare = new CharsRef();
|
||||
private final BytesRef byteSpare = new BytesRef();
|
||||
private final CharsRefBuilder spare = new CharsRefBuilder();
|
||||
private final BytesRefBuilder byteSpare = new BytesRefBuilder();
|
||||
private final int numCandidates;
|
||||
|
||||
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
|
||||
|
@ -129,11 +125,11 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
return set;
|
||||
}
|
||||
|
||||
protected BytesRef preFilter(final BytesRef term, final CharsRef spare, final BytesRef byteSpare) throws IOException {
|
||||
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException {
|
||||
if (preFilter == null) {
|
||||
return term;
|
||||
}
|
||||
final BytesRef result = byteSpare;
|
||||
final BytesRefBuilder result = byteSpare;
|
||||
SuggestUtils.analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() {
|
||||
|
||||
@Override
|
||||
|
@ -141,25 +137,25 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
|||
this.fillBytesRef(result);
|
||||
}
|
||||
}, spare);
|
||||
return result;
|
||||
return result.get();
|
||||
}
|
||||
|
||||
protected void postFilter(final Candidate candidate, final CharsRef spare, BytesRef byteSpare, final List<Candidate> candidates) throws IOException {
|
||||
protected void postFilter(final Candidate candidate, final CharsRefBuilder spare, BytesRefBuilder byteSpare, final List<Candidate> candidates) throws IOException {
|
||||
if (postFilter == null) {
|
||||
candidates.add(candidate);
|
||||
} else {
|
||||
final BytesRef result = byteSpare;
|
||||
final BytesRefBuilder result = byteSpare;
|
||||
SuggestUtils.analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() {
|
||||
@Override
|
||||
public void nextToken() throws IOException {
|
||||
this.fillBytesRef(result);
|
||||
|
||||
if (posIncAttr.getPositionIncrement() > 0 && result.bytesEquals(candidate.term)) {
|
||||
BytesRef term = BytesRef.deepCopyOf(result);
|
||||
if (posIncAttr.getPositionIncrement() > 0 && result.get().bytesEquals(candidate.term)) {
|
||||
BytesRef term = result.toBytesRef();
|
||||
long freq = frequency(term);
|
||||
candidates.add(new Candidate(BytesRef.deepCopyOf(term), freq, candidate.stringDistance, score(candidate.frequency, candidate.stringDistance, dictSize), false));
|
||||
candidates.add(new Candidate(result.toBytesRef(), freq, candidate.stringDistance, score(candidate.frequency, candidate.stringDistance, dictSize), false));
|
||||
} else {
|
||||
candidates.add(new Candidate(BytesRef.deepCopyOf(result), candidate.frequency, nonErrorLikelihood, score(candidate.frequency, candidate.stringDistance, dictSize), false));
|
||||
candidates.add(new Candidate(result.toBytesRef(), candidate.frequency, nonErrorLikelihood, score(candidate.frequency, candidate.stringDistance, dictSize), false));
|
||||
}
|
||||
}
|
||||
}, spare);
|
||||
|
|
|
@ -46,15 +46,15 @@ public final class LaplaceScorer extends WordScorer {
|
|||
@Override
|
||||
protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
|
||||
SuggestUtils.join(separator, spare, w_1.term, word.term);
|
||||
return (alpha + frequency(spare)) / (alpha + w_1.frequency + vocabluarySize);
|
||||
return (alpha + frequency(spare.get())) / (alpha + w_1.frequency + vocabluarySize);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double scoreTrigram(Candidate word, Candidate w_1, Candidate w_2) throws IOException {
|
||||
SuggestUtils.join(separator, spare, w_2.term, w_1.term, word.term);
|
||||
long trigramCount = frequency(spare);
|
||||
long trigramCount = frequency(spare.get());
|
||||
SuggestUtils.join(separator, spare, w_1.term, word.term);
|
||||
return (alpha + trigramCount) / (alpha + frequency(spare) + vocabluarySize);
|
||||
return (alpha + trigramCount) / (alpha + frequency(spare.get()) + vocabluarySize);
|
||||
}
|
||||
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue