mirror of https://github.com/apache/lucene.git
LUCENE-4240: don't invoke the Analyzer for not-analyzed fields, fix offsetGap to just take fieldName
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1363821 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
cc90a37ed7
commit
6a4cdbeb05
|
@ -50,6 +50,11 @@ API Changes
|
|||
filter another reader and you override correct() for offset correction.
|
||||
(Robert Muir)
|
||||
|
||||
* LUCENE-4240: Analyzer api now just takes fieldName for getOffsetGap. If the
|
||||
field is not analyzed (e.g. StringField), then the analyzer is not invoked
|
||||
at all. If you want to tweak things like positionIncrementGap and offsetGap,
|
||||
analyze the field with KeywordTokenizer instead. (Grant Ingersoll, Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4171: Performance improvements to Packed64.
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
|
||||
|
@ -114,21 +113,15 @@ public abstract class Analyzer {
|
|||
|
||||
/**
|
||||
* Just like {@link #getPositionIncrementGap}, except for
|
||||
* Token offsets instead. By default this returns 1 for
|
||||
* tokenized fields and, as if the fields were joined
|
||||
* with an extra space character, and 0 for un-tokenized
|
||||
* fields. This method is only called if the field
|
||||
* Token offsets instead. By default this returns 1.
|
||||
* This method is only called if the field
|
||||
* produced at least one token for indexing.
|
||||
*
|
||||
* @param field the field just indexed
|
||||
* @param fieldName the field just indexed
|
||||
* @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
|
||||
*/
|
||||
public int getOffsetGap(IndexableField field) {
|
||||
if (field.fieldType().tokenized()) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
public int getOffsetGap(String fieldName) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/** Frees persistent resources used by this Analyzer */
|
||||
|
|
|
@ -17,8 +17,6 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
|
@ -83,8 +81,8 @@ public abstract class AnalyzerWrapper extends Analyzer {
|
|||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public final int getOffsetGap(IndexableField field) {
|
||||
return getWrappedAnalyzer(field.name()).getOffsetGap(field);
|
||||
public final int getOffsetGap(String fieldName) {
|
||||
return getWrappedAnalyzer(fieldName).getOffsetGap(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -76,6 +76,7 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
// consumer if it wants to see this particular field
|
||||
// tokenized.
|
||||
if (fieldType.indexed() && doInvert) {
|
||||
final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
|
||||
|
||||
// if the field omits norms, the boost cannot be indexed.
|
||||
if (fieldType.omitNorms() && field.boost() != 1.0f) {
|
||||
|
@ -88,7 +89,7 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
int lastStartOffset = 0;
|
||||
|
||||
if (i > 0) {
|
||||
fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
|
||||
fieldState.position += analyzed ? docState.analyzer.getPositionIncrementGap(fieldInfo.name) : 0;
|
||||
}
|
||||
|
||||
final TokenStream stream = field.tokenStream(docState.analyzer);
|
||||
|
@ -188,7 +189,7 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
}
|
||||
}
|
||||
|
||||
fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
|
||||
fieldState.offset += analyzed ? docState.analyzer.getOffsetGap(fieldInfo.name) : 0;
|
||||
fieldState.boost *= field.boost();
|
||||
}
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -1799,4 +1800,40 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testDontInvokeAnalyzerForUnAnalyzedFields() throws Exception {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
throw new IllegalStateException("don't invoke me!");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPositionIncrementGap(String fieldName) {
|
||||
throw new IllegalStateException("don't invoke me!");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOffsetGap(String fieldName) {
|
||||
throw new IllegalStateException("don't invoke me!");
|
||||
}
|
||||
};
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzer));
|
||||
Document doc = new Document();
|
||||
FieldType customType = new FieldType(StringField.TYPE_NOT_STORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
Field f = newField("field", "abcd", customType);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
Field f2 = newField("field", "", customType);
|
||||
doc.add(f2);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue