Allow reusing indexed binary fields. (#12053)

Today Lucene allows creating indexed binary fields, e.g. via
`StringField(String, BytesRef, Field.Store)`, but not reusing them: calling
`setBytesValue` on a `StringField` throws.

This commit removes the check that prevents reusing fields with binary values.
I considered an alternative that consisted of failing if calling
`setBytesValue` on a field that is indexed and tokenized, but we currently
don't have such checks e.g. on numeric values, so it did not feel consistent.

Doing this change would help improve the [nightly benchmarks for the NYC taxis
dataset](http://people.apache.org/~mikemccand/lucenebench/sparseResults.html)
by doing the String -> UTF-8 conversion only once for keywords, instead of once
for the `StringField` and one for the `SortedDocValuesField`, while still
reusing fields.
This commit is contained in:
Adrien Grand 2023-01-12 09:32:13 +01:00
parent 525a11091c
commit 9ab324d2be
11 changed files with 154 additions and 82 deletions

View File

@ -38,6 +38,15 @@ for (ScoreDoc hit : hits.scoreDocs) {
Note that these StoredFields and TermVectors instances should only be consumed in the thread where Note that these StoredFields and TermVectors instances should only be consumed in the thread where
they were acquired. For instance, it is illegal to share them across threads. they were acquired. For instance, it is illegal to share them across threads.
### Field can no longer configure a TokenStream independently from a value
Lucene 9.x and earlier versions allowed to set a TokenStream on Field instances
independently from a string, binary or numeric value. This is no longer allowed
on the base Field class. If you need to replicate this behavior, you need to
either provide two fields, one with a TokenStream and another one with a value,
or create a sub-class of Field that overrides `TokenStream
tokenStream(Analyzer, TokenStream)` to return a custom TokenStream.
### PersianStemFilter is added to PersianAnalyzer (LUCENE-10312) ### PersianStemFilter is added to PersianAnalyzer (LUCENE-10312)
PersianAnalyzer now includes PersianStemFilter, that would change analysis results. If you need the exactly same analysis PersianAnalyzer now includes PersianStemFilter, that would change analysis results. If you need the exactly same analysis

View File

@ -38,10 +38,12 @@ public class TestFixBrokenOffsetsFilter extends BaseTokenStreamTestCase {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true); ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", "", ft); Field field =
field.setTokenStream( new Field(
new FixBrokenOffsetsFilter( "foo",
new CannedTokenStream(new Token("bar", 5, 10), new Token("bar", 1, 4)))); new FixBrokenOffsetsFilter(
new CannedTokenStream(new Token("bar", 5, 10), new Token("bar", 1, 4))),
ft);
doc.add(field); doc.add(field);
iw.addDocument(doc); iw.addDocument(doc);
iw.close(); iw.close();

View File

@ -23,6 +23,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute; import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.index.IndexableFieldType;
@ -69,13 +70,8 @@ public class Field implements IndexableField {
protected Object fieldsData; protected Object fieldsData;
/** /**
* Pre-analyzed tokenStream for indexed fields; this is separate from fieldsData because you are * Expert: creates a field with no initial value. This is intended to be used by custom {@link
* allowed to have both; eg maybe field has a String value but you customize how it's tokenized * Field} sub-classes with pre-configured {@link IndexableFieldType}s.
*/
protected TokenStream tokenStream;
/**
* Expert: creates a field with no initial value. Intended only for custom Field subclasses.
* *
* @param name field name * @param name field name
* @param type field type * @param type field type
@ -149,8 +145,7 @@ public class Field implements IndexableField {
} }
this.name = name; this.name = name;
this.fieldsData = null; this.fieldsData = tokenStream;
this.tokenStream = tokenStream;
this.type = type; this.type = type;
} }
@ -210,6 +205,20 @@ public class Field implements IndexableField {
if (type == null) { if (type == null) {
throw new IllegalArgumentException("type must not be null"); throw new IllegalArgumentException("type must not be null");
} }
if (type.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0
|| type.storeTermVectorOffsets()) {
throw new IllegalArgumentException("It doesn't make sense to index offsets on binary fields");
}
if (type.indexOptions() != IndexOptions.NONE && type.tokenized()) {
throw new IllegalArgumentException("cannot set a BytesRef value on a tokenized field");
}
if (type.indexOptions() == IndexOptions.NONE
&& type.pointDimensionCount() == 0
&& type.docValuesType() == DocValuesType.NONE
&& type.stored() == false) {
throw new IllegalArgumentException(
"it doesn't make sense to have a field that is neither indexed, nor doc-valued, nor stored");
}
this.name = name; this.name = name;
this.fieldsData = bytes; this.fieldsData = bytes;
this.type = type; this.type = type;
@ -237,9 +246,9 @@ public class Field implements IndexableField {
if (type == null) { if (type == null) {
throw new IllegalArgumentException("type must not be null"); throw new IllegalArgumentException("type must not be null");
} }
if (!type.stored() && type.indexOptions() == IndexOptions.NONE) { if (type.stored() == false && type.indexOptions() == IndexOptions.NONE) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"it doesn't make sense to have a field that " + "is neither indexed nor stored"); "it doesn't make sense to have a field that is neither indexed nor stored");
} }
this.name = name; this.name = name;
this.fieldsData = value; this.fieldsData = value;
@ -278,7 +287,7 @@ public class Field implements IndexableField {
* String value is analyzed to produce the indexed tokens. * String value is analyzed to produce the indexed tokens.
*/ */
public TokenStream tokenStreamValue() { public TokenStream tokenStreamValue() {
return tokenStream; return fieldsData instanceof TokenStream ? (TokenStream) fieldsData : null;
} }
/** /**
@ -329,9 +338,6 @@ public class Field implements IndexableField {
+ fieldsData.getClass().getSimpleName() + fieldsData.getClass().getSimpleName()
+ " to BytesRef"); + " to BytesRef");
} }
if (type.indexOptions() != IndexOptions.NONE) {
throw new IllegalArgumentException("cannot set a BytesRef value on an indexed field");
}
if (value == null) { if (value == null) {
throw new IllegalArgumentException("value must not be null"); throw new IllegalArgumentException("value must not be null");
} }
@ -392,15 +398,15 @@ public class Field implements IndexableField {
fieldsData = Double.valueOf(value); fieldsData = Double.valueOf(value);
} }
/** /** Expert: sets the token stream to be used for indexing. */
* Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized()
* to return true. May be combined with stored values from stringValue() or binaryValue()
*/
public void setTokenStream(TokenStream tokenStream) { public void setTokenStream(TokenStream tokenStream) {
if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) { if (!(fieldsData instanceof TokenStream)) {
throw new IllegalArgumentException("TokenStream fields must be indexed and tokenized"); throw new IllegalArgumentException(
"cannot change value type from "
+ fieldsData.getClass().getSimpleName()
+ " to TokenStream");
} }
this.tokenStream = tokenStream; this.fieldsData = tokenStream;
} }
@Override @Override
@ -478,8 +484,8 @@ public class Field implements IndexableField {
} }
} }
if (tokenStream != null) { if (tokenStreamValue() != null) {
return tokenStream; return tokenStreamValue();
} else if (readerValue() != null) { } else if (readerValue() != null) {
return analyzer.tokenStream(name(), readerValue()); return analyzer.tokenStream(name(), readerValue());
} else if (stringValue() != null) { } else if (stringValue() != null) {

View File

@ -23,6 +23,7 @@ import java.nio.charset.StandardCharsets;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -425,6 +426,31 @@ public class TestField extends LuceneTestCase {
} }
} }
public void testBinaryStringField() throws Exception {
Field[] fields =
new Field[] {
new StringField("foo", new BytesRef("bar"), Field.Store.NO),
new StringField("foo", new BytesRef("bar"), Field.Store.YES)
};
for (Field field : fields) {
trySetByteValue(field);
field.setBytesValue("baz".getBytes(StandardCharsets.UTF_8));
assertEquals(new BytesRef("baz"), field.binaryValue());
field.setBytesValue(new BytesRef("baz"));
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(new BytesRef("baz"), field.binaryValue());
}
}
public void testTextFieldString() throws Exception { public void testTextFieldString() throws Exception {
Field[] fields = Field[] fields =
new Field[] { new Field[] {
@ -442,7 +468,7 @@ public class TestField extends LuceneTestCase {
trySetReaderValue(field); trySetReaderValue(field);
trySetShortValue(field); trySetShortValue(field);
field.setStringValue("baz"); field.setStringValue("baz");
field.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3))); trySetTokenStreamValue(field);
assertEquals("baz", field.stringValue()); assertEquals("baz", field.stringValue());
} }
@ -461,7 +487,7 @@ public class TestField extends LuceneTestCase {
field.setReaderValue(new StringReader("foobar")); field.setReaderValue(new StringReader("foobar"));
trySetShortValue(field); trySetShortValue(field);
trySetStringValue(field); trySetStringValue(field);
field.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3))); trySetTokenStreamValue(field);
assertNotNull(field.readerValue()); assertNotNull(field.readerValue());
} }
@ -728,4 +754,32 @@ public class TestField extends LuceneTestCase {
f.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3))); f.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
}); });
} }
public void testDisabledField() {
// neither indexed nor stored
FieldType ft = new FieldType();
expectThrows(IllegalArgumentException.class, () -> new Field("name", "", ft));
}
public void testTokenizedBinaryField() {
FieldType ft = new FieldType();
ft.setTokenized(true);
ft.setIndexOptions(IndexOptions.DOCS);
expectThrows(IllegalArgumentException.class, () -> new Field("name", new BytesRef(), ft));
}
public void testOffsetsBinaryField() {
FieldType ft = new FieldType();
ft.setTokenized(false);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
expectThrows(IllegalArgumentException.class, () -> new Field("name", new BytesRef(), ft));
}
public void testTermVectorsOffsetsBinaryField() {
FieldType ft = new FieldType();
ft.setTokenized(false);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
expectThrows(IllegalArgumentException.class, () -> new Field("name", new BytesRef(), ft));
}
} }

View File

@ -880,17 +880,21 @@ public class TestDocValuesIndexing extends LuceneTestCase {
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(dir, iwc); IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document(); Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
ft.setDocValuesType(DocValuesType.SORTED); ft.setDocValuesType(DocValuesType.SORTED);
ft.freeze(); ft.freeze();
Field field = new Field("test", "value", ft); Field field =
field.setTokenStream( new Field("test", new BytesRef("value"), ft) {
new TokenStream() {
@Override @Override
public boolean incrementToken() { public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
throw new RuntimeException("no"); return new TokenStream() {
@Override
public boolean incrementToken() throws IOException {
throw new RuntimeException();
}
};
} }
}); };
doc.add(field); doc.add(field);
expectThrows( expectThrows(
RuntimeException.class, RuntimeException.class,

View File

@ -1166,41 +1166,44 @@ public class TestIndexWriter extends LuceneTestCase {
FieldType customType = new FieldType(StoredField.TYPE); FieldType customType = new FieldType(StoredField.TYPE);
customType.setTokenized(true); customType.setTokenized(true);
Field f = new Field("binary", b, 10, 17, customType); final MockTokenizer field1 = new MockTokenizer(MockTokenizer.WHITESPACE, false);
Field f =
new Field("binary", b, 10, 17, customType) {
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
return field1;
}
};
// TODO: this is evil, changing the type after creating the field: // TODO: this is evil, changing the type after creating the field:
customType.setIndexOptions(IndexOptions.DOCS); customType.setIndexOptions(IndexOptions.DOCS);
final MockTokenizer doc1field1 = new MockTokenizer(MockTokenizer.WHITESPACE, false); field1.setReader(new StringReader("doc1field1"));
doc1field1.setReader(new StringReader("doc1field1"));
f.setTokenStream(doc1field1);
FieldType customType2 = new FieldType(TextField.TYPE_STORED); FieldType customType2 = new FieldType(TextField.TYPE_STORED);
Field f2 = newField("string", "value", customType2); final MockTokenizer field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false);
final MockTokenizer doc1field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false); Field f2 =
doc1field2.setReader(new StringReader("doc1field2")); new Field("string", "value", customType2) {
f2.setTokenStream(doc1field2); @Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
return field2;
}
};
field2.setReader(new StringReader("doc1field2"));
doc.add(f); doc.add(f);
doc.add(f2); doc.add(f2);
w.addDocument(doc); w.addDocument(doc);
// add 2 docs to test in-memory merging // add 2 docs to test in-memory merging
final MockTokenizer doc2field1 = new MockTokenizer(MockTokenizer.WHITESPACE, false); field1.setReader(new StringReader("doc2field1"));
doc2field1.setReader(new StringReader("doc2field1")); field2.setReader(new StringReader("doc2field2"));
f.setTokenStream(doc2field1);
final MockTokenizer doc2field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false);
doc2field2.setReader(new StringReader("doc2field2"));
f2.setTokenStream(doc2field2);
w.addDocument(doc); w.addDocument(doc);
// force segment flush so we can force a segment merge with doc3 later. // force segment flush so we can force a segment merge with doc3 later.
w.commit(); w.commit();
final MockTokenizer doc3field1 = new MockTokenizer(MockTokenizer.WHITESPACE, false); field1.setReader(new StringReader("doc3field1"));
doc3field1.setReader(new StringReader("doc3field1")); field2.setReader(new StringReader("doc3field2"));
f.setTokenStream(doc3field1);
final MockTokenizer doc3field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false);
doc3field2.setReader(new StringReader("doc3field2"));
f2.setTokenStream(doc3field2);
w.addDocument(doc); w.addDocument(doc);
w.commit(); w.commit();

View File

@ -1497,13 +1497,13 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
doc.add(newStringField("id", docCount + "", Field.Store.NO)); doc.add(newStringField("id", docCount + "", Field.Store.NO));
doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO)); doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO));
if (docCount == 4) { if (docCount == 4) {
Field f = newTextField("crash", "", Field.Store.NO);
doc.add(f);
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader("crash me on the 4th token")); tokenizer.setReader(new StringReader("crash me on the 4th token"));
tokenizer.setEnableChecks( tokenizer.setEnableChecks(
false); // disable workflow checking as we forcefully close() in exceptional cases. false); // disable workflow checking as we forcefully close() in exceptional cases.
f.setTokenStream(new CrashingFilter("crash", tokenizer)); Field f =
new Field("crash", new CrashingFilter("crash", tokenizer), TextField.TYPE_NOT_STORED);
doc.add(f);
} }
} }
@ -1573,13 +1573,13 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
doc.add(newStringField("id", docCount + "", Field.Store.NO)); doc.add(newStringField("id", docCount + "", Field.Store.NO));
doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO)); doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO));
if (docCount == crashAt) { if (docCount == crashAt) {
Field f = newTextField("crash", "", Field.Store.NO);
doc.add(f);
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader("crash me on the 4th token")); tokenizer.setReader(new StringReader("crash me on the 4th token"));
tokenizer.setEnableChecks( tokenizer.setEnableChecks(
false); // disable workflow checking as we forcefully close() in exceptional cases. false); // disable workflow checking as we forcefully close() in exceptional cases.
f.setTokenStream(new CrashingFilter("crash", tokenizer)); Field f =
new Field("crash", new CrashingFilter("crash", tokenizer), TextField.TYPE_NOT_STORED);
doc.add(f);
} }
} }

View File

@ -568,10 +568,9 @@ public class TestPayloads extends LuceneTestCase {
iwc.setMergePolicy(newLogMergePolicy()); iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document(); Document doc = new Document();
Field field = new TextField("field", "", Field.Store.NO);
TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("here we go")); ((Tokenizer) ts).setReader(new StringReader("here we go"));
field.setTokenStream(ts); Field field = new Field("field", ts, TextField.TYPE_NOT_STORED);
doc.add(field); doc.add(field);
writer.addDocument(doc); writer.addDocument(doc);
Token withPayload = new Token("withPayload", 0, 11); Token withPayload = new Token("withPayload", 0, 11);
@ -601,22 +600,20 @@ public class TestPayloads extends LuceneTestCase {
Directory dir = newDirectory(); Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir); RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document(); Document doc = new Document();
Field field = new TextField("field", "", Field.Store.NO);
TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
Field field = new Field("field", ts, TextField.TYPE_NOT_STORED);
((Tokenizer) ts).setReader(new StringReader("here we go")); ((Tokenizer) ts).setReader(new StringReader("here we go"));
field.setTokenStream(ts); field.setTokenStream(ts);
doc.add(field); doc.add(field);
Field field2 = new TextField("field", "", Field.Store.NO);
Token withPayload = new Token("withPayload", 0, 11); Token withPayload = new Token("withPayload", 0, 11);
withPayload.setPayload(new BytesRef("test")); withPayload.setPayload(new BytesRef("test"));
ts = new CannedTokenStream(withPayload); ts = new CannedTokenStream(withPayload);
assertTrue(ts.hasAttribute(PayloadAttribute.class)); assertTrue(ts.hasAttribute(PayloadAttribute.class));
field2.setTokenStream(ts); Field field2 = new Field("field", ts, TextField.TYPE_NOT_STORED);
doc.add(field2); doc.add(field2);
Field field3 = new TextField("field", "", Field.Store.NO);
ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("nopayload")); ((Tokenizer) ts).setReader(new StringReader("nopayload"));
field3.setTokenStream(ts); Field field3 = new Field("field", ts, TextField.TYPE_NOT_STORED);
doc.add(field3); doc.add(field3);
writer.addDocument(doc); writer.addDocument(doc);
DirectoryReader reader = writer.getReader(); DirectoryReader reader = writer.getReader();

View File

@ -47,10 +47,9 @@ public class TestPayloadsOnVectors extends LuceneTestCase {
customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorPayloads(true); customType.setStoreTermVectorPayloads(true);
customType.setStoreTermVectorOffsets(random().nextBoolean()); customType.setStoreTermVectorOffsets(random().nextBoolean());
Field field = new Field("field", "", customType);
TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("here we go")); ((Tokenizer) ts).setReader(new StringReader("here we go"));
field.setTokenStream(ts); Field field = new Field("field", ts, customType);
doc.add(field); doc.add(field);
writer.addDocument(doc); writer.addDocument(doc);
@ -90,22 +89,19 @@ public class TestPayloadsOnVectors extends LuceneTestCase {
customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorPayloads(true); customType.setStoreTermVectorPayloads(true);
customType.setStoreTermVectorOffsets(random().nextBoolean()); customType.setStoreTermVectorOffsets(random().nextBoolean());
Field field = new Field("field", "", customType);
TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("here we go")); ((Tokenizer) ts).setReader(new StringReader("here we go"));
field.setTokenStream(ts); Field field = new Field("field", ts, customType);
doc.add(field); doc.add(field);
Field field2 = new Field("field", "", customType);
Token withPayload = new Token("withPayload", 0, 11); Token withPayload = new Token("withPayload", 0, 11);
withPayload.setPayload(new BytesRef("test")); withPayload.setPayload(new BytesRef("test"));
ts = new CannedTokenStream(withPayload); ts = new CannedTokenStream(withPayload);
assertTrue(ts.hasAttribute(PayloadAttribute.class)); assertTrue(ts.hasAttribute(PayloadAttribute.class));
field2.setTokenStream(ts); Field field2 = new Field("field", ts, customType);
doc.add(field2); doc.add(field2);
Field field3 = new Field("field", "", customType);
ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ts = new MockTokenizer(MockTokenizer.WHITESPACE, true);
((Tokenizer) ts).setReader(new StringReader("nopayload")); ((Tokenizer) ts).setReader(new StringReader("nopayload"));
field3.setTokenStream(ts); Field field3 = new Field("field", ts, customType);
doc.add(field3); doc.add(field3);
writer.addDocument(doc); writer.addDocument(doc);
DirectoryReader reader = writer.getReader(); DirectoryReader reader = writer.getReader();

View File

@ -51,8 +51,6 @@ public class TestTerms extends LuceneTestCase {
BytesRef maxTerm = null; BytesRef maxTerm = null;
for (int i = 0; i < numDocs; i++) { for (int i = 0; i < numDocs; i++) {
Document doc = new Document(); Document doc = new Document();
Field field = new TextField("field", "", Field.Store.NO);
doc.add(field);
// System.out.println(" doc " + i); // System.out.println(" doc " + i);
CannedBinaryTokenStream.BinaryToken[] tokens = CannedBinaryTokenStream.BinaryToken[] tokens =
new CannedBinaryTokenStream.BinaryToken[atLeast(10)]; new CannedBinaryTokenStream.BinaryToken[atLeast(10)];
@ -71,7 +69,9 @@ public class TestTerms extends LuceneTestCase {
} }
tokens[j] = new CannedBinaryTokenStream.BinaryToken(tokenBytes); tokens[j] = new CannedBinaryTokenStream.BinaryToken(tokenBytes);
} }
field.setTokenStream(new CannedBinaryTokenStream(tokens)); Field field =
new Field("field", new CannedBinaryTokenStream(tokens), TextField.TYPE_NOT_STORED);
doc.add(field);
w.addDocument(doc); w.addDocument(doc);
} }

View File

@ -1530,7 +1530,8 @@ public abstract class LuceneTestCase extends Assert {
} }
} }
} }
if (!newType.storeTermVectorOffsets()) { // Check for strings as offsets are disallowed on binary fields
if (value instanceof String && !newType.storeTermVectorOffsets()) {
newType.setStoreTermVectorOffsets(random.nextBoolean()); newType.setStoreTermVectorOffsets(random.nextBoolean());
} }