reuse non analyzed token stream for string types

so heavyweight token stream won't be created each time
This commit is contained in:
Shay Banon 2012-12-12 22:53:48 -08:00
parent fc35fd8a29
commit c65d5a77c4
1 changed files with 87 additions and 1 deletions

View File

@ -20,6 +20,9 @@
package org.elasticsearch.index.mapper.core;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
@ -294,7 +297,7 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
context.ignoredValue(names.indexName(), value);
return null;
}
Field field = new Field(names.indexName(), value, fieldType);
Field field = new StringField(names.indexName(), value, fieldType);
field.setBoost(boost);
return field;
}
@ -364,4 +367,87 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
builder.field("ignore_above", ignoreAbove);
}
}
/**
* Extension of {@link Field} supporting reuse of a cached TokenStream for not-tokenized values.
*/
static class StringField extends Field {
public StringField(String name, String value, FieldType fieldType) {
super(name, value, fieldType);
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
if (!fieldType().indexed()) {
return null;
}
// Only use the cached TokenStream if the value is indexed and not-tokenized
if (fieldType().tokenized()) {
return super.tokenStream(analyzer);
}
return NOT_ANALYZED_TOKENSTREAM.get().setValue((String) fieldsData);
}
}
private static final ThreadLocal<StringTokenStream> NOT_ANALYZED_TOKENSTREAM = new ThreadLocal<StringTokenStream>() {
@Override
protected StringTokenStream initialValue() {
return new StringTokenStream();
}
};
// Copied from Field.java
static final class StringTokenStream extends TokenStream {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private boolean used = false;
private String value = null;
/**
* Creates a new TokenStream that returns a String as single token.
* <p>Warning: Does not initialize the value, you must call
* {@link #setValue(String)} afterwards!
*/
StringTokenStream() {
}
/**
* Sets the string value.
*/
StringTokenStream setValue(String value) {
this.value = value;
return this;
}
@Override
public boolean incrementToken() {
if (used) {
return false;
}
clearAttributes();
termAttribute.append(value);
offsetAttribute.setOffset(0, value.length());
used = true;
return true;
}
@Override
public void end() {
final int finalOffset = value.length();
offsetAttribute.setOffset(finalOffset, finalOffset);
value = null;
}
@Override
public void reset() {
used = false;
}
@Override
public void close() {
value = null;
}
}
}