LUCENE-6653, LUCENE-6652: Refactor TermToBytesRefAttribute; add oal.analysis.tokenattributes.BytesTermAttribute; remove code duplication in tests

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1688830 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2015-07-02 14:09:29 +00:00
parent 96a0dc55d5
commit 1816ed1945
40 changed files with 222 additions and 615 deletions

View File

@ -116,6 +116,10 @@ New Features
* LUCENE-6632: Geo3D: Compute circle planes more accurately.
(Karl Wright via David Smiley)
* LUCENE-6653: Added general purpose BytesTermAttribute to basic token
attributes package that can be used for TokenStreams that solely produce
binary terms. (Uwe Schindler)
API Changes
* LUCENE-6508: Simplify Lock api, there is now just
@ -295,6 +299,9 @@ Test Framework
environments (e.g., read-only dirs). If tests are running without a security
manager, an assume cancels test execution automatically. (Uwe Schindler)
* LUCENE-6652: Removed lots of useless Byte(s)TermAttributes all over test
infrastructure. (Uwe Schindler)
Changes in Backwards Compatibility Policy
* LUCENE-6553: The iterator returned by the LeafReader.postings method now
@ -305,6 +312,11 @@ Changes in Backwards Compatibility Policy
DiversifiedTopDocsCollector can be used instead with a maximum number of hits
per key equal to 1. (Adrien Grand)
* LUCENE-6653: The workflow for consuming the TermToBytesRefAttribute was changed:
getBytesRef() now does all work and is called on each token, fillBytesRef()
was removed. The implementation is free to reuse the internal BytesRef
or return a new one on each call. (Uwe Schindler)
======================= Lucene 5.2.1 =======================
Bug Fixes

View File

@ -40,11 +40,12 @@ public class CollatedTermAttributeImpl extends CharTermAttributeImpl {
}
@Override
public void fillBytesRef() {
BytesRef bytes = getBytesRef();
bytes.bytes = collator.getCollationKey(toString()).toByteArray();
bytes.offset = 0;
bytes.length = bytes.bytes.length;
public BytesRef getBytesRef() {
final BytesRef ref = this.builder.get();
ref.bytes = collator.getCollationKey(toString()).toByteArray();
ref.offset = 0;
ref.length = ref.bytes.length;
return ref;
}
}

View File

@ -45,11 +45,12 @@ public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
}
@Override
public void fillBytesRef() {
BytesRef bytes = getBytesRef();
public BytesRef getBytesRef() {
collator.getRawCollationKey(toString(), key);
bytes.bytes = key.bytes;
bytes.offset = 0;
bytes.length = key.size;
final BytesRef ref = this.builder.get();
ref.bytes = key.bytes;
ref.offset = 0;
ref.length = key.size;
return ref;
}
}

View File

@ -96,9 +96,4 @@ public class MorphosyntacticTagsAttributeImpl extends AttributeImpl
this.copyTo(cloned);
return cloned;
}
@Override
public String toString() {
return tags == null ? "<no tags>" : tags.toString();
}
}

View File

@ -87,7 +87,7 @@ public class ReadTokensTask extends PerfTask {
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
while(stream.incrementToken()) {
termAtt.fillBytesRef();
termAtt.getBytesRef();
tokenCount++;
}
stream.end();

View File

@ -976,8 +976,6 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
assertTrue(ts2.incrementToken());
BytesRef bytes1 = termAtt1.getBytesRef();
BytesRef bytes2 = termAtt2.getBytesRef();
termAtt1.fillBytesRef();
termAtt2.fillBytesRef();
assertEquals(bytes1, bytes2);
assertFalse(ts1.incrementToken());
assertFalse(ts2.incrementToken());

View File

@ -29,8 +29,6 @@ import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
@ -58,7 +56,6 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
@ -234,7 +231,7 @@ public class TestAutoPrefixTerms extends LuceneTestCase {
for(Integer term : terms) {
Document doc = new Document();
doc.add(new BinaryField("field", intToBytes(term)));
doc.add(newStringField("field", intToBytes(term), Field.Store.NO));
doc.add(new NumericDocValuesField("field", term));
w.addDocument(doc);
}
@ -506,78 +503,6 @@ public class TestAutoPrefixTerms extends LuceneTestCase {
dir.close();
}
static final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
private boolean available = true;
public BinaryTokenStream(BytesRef bytes) {
bytesAtt.setBytesRef(bytes);
}
@Override
public boolean incrementToken() {
if (available) {
clearAttributes();
available = false;
return true;
}
return false;
}
@Override
public void reset() {
available = true;
}
public interface ByteTermAttribute extends TermToBytesRefAttribute {
void setBytesRef(BytesRef bytes);
}
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
private BytesRef bytes;
@Override
public void fillBytesRef() {
// no-op: the bytes was already filled by our owner's incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
@Override
public void clear() {}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
}
}
/** Basically a StringField that accepts binary term. */
private static class BinaryField extends Field {
final static FieldType TYPE;
static {
TYPE = new FieldType(StringField.TYPE_NOT_STORED);
// Necessary so our custom tokenStream is used by Field.tokenStream:
TYPE.setTokenized(true);
TYPE.freeze();
}
public BinaryField(String name, BytesRef value) {
super(name, new BinaryTokenStream(value), TYPE);
}
}
/** Helper class to ensure auto-prefix terms 1) never overlap one another, and 2) are used when they should be. */
private static class VerifyAutoPrefixTerms {
final FixedBitSet allHits;

View File

@ -158,17 +158,13 @@ public final class NumericTokenStream extends TokenStream {
@Override
public BytesRef getBytesRef() {
return bytes.get();
}
@Override
public void fillBytesRef() {
assert valueSize == 64 || valueSize == 32;
if (valueSize == 64) {
NumericUtils.longToPrefixCoded(value, shift, bytes);
} else {
NumericUtils.intToPrefixCoded((int) value, shift, bytes);
}
return bytes.get();
}
@Override
@ -201,8 +197,7 @@ public final class NumericTokenStream extends TokenStream {
@Override
public void reflectWith(AttributeReflector reflector) {
fillBytesRef();
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes.toBytesRef());
reflector.reflect(TermToBytesRefAttribute.class, "bytes", getBytesRef());
reflector.reflect(NumericTermAttribute.class, "shift", shift);
reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());
reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize);
@ -219,7 +214,7 @@ public final class NumericTokenStream extends TokenStream {
NumericTermAttributeImpl t = (NumericTermAttributeImpl)super.clone();
// Do a deep clone
t.bytes = new BytesRefBuilder();
t.bytes.copyBytes(bytes.get());
t.bytes.copyBytes(getBytesRef());
return t;
}

View File

@ -105,8 +105,6 @@ public class TokenStreamToAutomaton {
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class);
final BytesRef term = termBytesAtt.getBytesRef();
in.reset();
// Only temporarily holds states ahead of our current
@ -157,8 +155,7 @@ public class TokenStreamToAutomaton {
final int endPos = pos + posLengthAtt.getPositionLength();
termBytesAtt.fillBytesRef();
final BytesRef termUTF8 = changeToken(term);
final BytesRef termUTF8 = changeToken(termBytesAtt.getBytesRef());
int[] termUnicode = null;
final Position endPosData = positions.get(endPos);
if (endPosData.arriving == -1) {

View File

@ -0,0 +1,31 @@
package org.apache.lucene.analysis.tokenattributes;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
/**
* This attribute can be used if you have the raw term bytes to be indexed.
* It can be used as replacement for {@link CharTermAttribute}, if binary
* terms should be indexed.
* @lucene.internal
*/
public interface BytesTermAttribute extends TermToBytesRefAttribute {
/** Sets the {@link BytesRef} of the term */
public void setBytesRef(BytesRef bytes);
}

View File

@ -0,0 +1,65 @@
package org.apache.lucene.analysis.tokenattributes;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
/** Implementation class for {@link BytesTermAttribute}.
* @lucene.internal
*/
public class BytesTermAttributeImpl extends AttributeImpl implements BytesTermAttribute, TermToBytesRefAttribute {
private BytesRef bytes;
/** Initialize this attribute with no bytes. */
public BytesTermAttributeImpl() {}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
@Override
public void clear() {
this.bytes = null;
}
@Override
public void copyTo(AttributeImpl target) {
BytesTermAttributeImpl other = (BytesTermAttributeImpl) target;
other.bytes = BytesRef.deepCopyOf(bytes);
}
@Override
public AttributeImpl clone() {
BytesTermAttributeImpl c = (BytesTermAttributeImpl)super.clone();
copyTo(c);
return c;
}
@Override
public void reflectWith(AttributeReflector reflector) {
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
}
}

View File

@ -33,6 +33,9 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
private int termLength = 0;
/** May be used by subclasses to convert to different charsets / encodings for implementing {@link #getBytesRef()}. */
protected BytesRefBuilder builder = new BytesRefBuilder();
/** Initialize this attribute with empty term text */
public CharTermAttributeImpl() {}
@ -83,16 +86,11 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
}
// *** TermToBytesRefAttribute interface ***
private BytesRefBuilder bytes = new BytesRefBuilder();
@Override
public void fillBytesRef() {
bytes.copyChars(termBuffer, 0, termLength);
}
@Override
public BytesRef getBytesRef() {
return bytes.get();
builder.copyChars(termBuffer, 0, termLength);
return builder.get();
}
// *** CharSequence interface ***
@ -228,8 +226,8 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
// Do a deep clone
t.termBuffer = new char[this.termLength];
System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength);
t.bytes = new BytesRefBuilder();
t.bytes.copyBytes(bytes.get());
t.builder = new BytesRefBuilder();
t.builder.copyBytes(builder.get());
return t;
}
@ -271,8 +269,7 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
@Override
public void reflectWith(AttributeReflector reflector) {
reflector.reflect(CharTermAttribute.class, "term", toString());
fillBytesRef();
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes.toBytesRef());
reflector.reflect(TermToBytesRefAttribute.class, "bytes", getBytesRef());
}
@Override

View File

@ -24,17 +24,12 @@ import org.apache.lucene.util.BytesRef;
* This attribute is requested by TermsHashPerField to index the contents.
* This attribute can be used to customize the final byte[] encoding of terms.
* <p>
* Consumers of this attribute call {@link #getBytesRef()} up-front, and then
* invoke {@link #fillBytesRef()} for each term. Example:
* Consumers of this attribute call {@link #getBytesRef()} for each term. Example:
* <pre class="prettyprint">
* final TermToBytesRefAttribute termAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
* final BytesRef bytes = termAtt.getBytesRef();
*
* while (tokenStream.incrementToken() {
*
* // you must call termAtt.fillBytesRef() before doing something with the bytes.
* // this encodes the term value (internally it might be a char[], etc) into the bytes.
* int hashCode = termAtt.fillBytesRef();
* final BytesRef bytes = termAtt.getBytesRef();
*
* if (isInteresting(bytes)) {
*
@ -42,27 +37,21 @@ import org.apache.lucene.util.BytesRef;
* // you should make a copy if you need persistent access to the bytes, otherwise they will
* // be rewritten across calls to incrementToken()
*
* doSomethingWith(new BytesRef(bytes));
* doSomethingWith(BytesRef.deepCopyOf(bytes));
* }
* }
* ...
* </pre>
* @lucene.experimental This is a very expert API, please use
* {@link CharTermAttributeImpl} and its implementation of this method
* for UTF-8 terms.
* @lucene.internal This is a very expert and internal API, please use
* {@link CharTermAttribute} and its implementation for UTF-8 terms; to
* index binary terms, use {@link BytesTermAttribute} and its implementation.
*/
public interface TermToBytesRefAttribute extends Attribute {
/**
* Updates the bytes {@link #getBytesRef()} to contain this term's
* final encoding.
*/
public void fillBytesRef();
/**
* Retrieve this attribute's BytesRef. The bytes are updated
* from the current term when the consumer calls {@link #fillBytesRef()}.
* @return this Attributes internal BytesRef.
* Retrieve this attribute's BytesRef. The bytes are updated from the current term.
* The implementation may return a new instance or keep the previous one.
* @return a BytesRef to be indexed (only stays valid until token stream gets incremented)
*/
public BytesRef getBytesRef();
}

View File

@ -23,17 +23,14 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.FieldType.NumericType;
import org.apache.lucene.index.FieldInvertState; // javadocs
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
/**
@ -570,71 +567,47 @@ public class Field implements IndexableField, StorableField {
}
private static final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
// Do not init this to true, becase caller must first call reset:
private boolean available;
private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class);
private boolean used = true;
private BytesRef value;
public BinaryTokenStream() {
/** Creates a new TokenStream that returns a BytesRef as single token.
* <p>Warning: Does not initialize the value, you must call
* {@link #setValue(BytesRef)} afterwards!
*/
BinaryTokenStream() {
}
public void setValue(BytesRef value) {
bytesAtt.setBytesRef(value);
this.value = value;
}
@Override
public boolean incrementToken() {
if (available) {
clearAttributes();
available = false;
return true;
if (used) {
return false;
}
return false;
clearAttributes();
bytesAtt.setBytesRef(value);
used = true;
return true;
}
@Override
public void reset() {
available = true;
used = false;
}
public interface ByteTermAttribute extends TermToBytesRefAttribute {
public void setBytesRef(BytesRef bytes);
}
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
private BytesRef bytes;
@Override
public void fillBytesRef() {
// no-op: the bytes was already filled by our owner's incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
@Override
public void clear() {
}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
@Override
public void close() {
value = null;
}
}
private static final class StringTokenStream extends TokenStream {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private boolean used = false;
private boolean used = true;
private String value = null;
/** Creates a new TokenStream that returns a String as single token.

View File

@ -36,7 +36,6 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
protected final DocumentsWriterPerThread.DocState docState;
protected final FieldInvertState fieldState;
TermToBytesRefAttribute termAtt;
BytesRef termBytesRef;
// Copied from our perThread
final IntBlockPool intPool;
@ -145,13 +144,10 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
* entry point (for first TermsHash); postings use this
* API. */
void add() throws IOException {
termAtt.fillBytesRef();
// We are first in the chain so we must "intern" the
// term text into textStart address
// Get the text & hash of this term.
int termID = bytesHash.add(termBytesRef);
int termID = bytesHash.add(termAtt.getBytesRef());
//System.out.println("add term=" + termBytesRef.utf8ToString() + " doc=" + docState.docID + " termID=" + termID);
@ -292,10 +288,6 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
* document. */
boolean start(IndexableField field, boolean first) {
termAtt = fieldState.termAttribute;
// EmptyTokenStream can have null term att
if (termAtt != null) {
termBytesRef = termAtt.getBytesRef();
}
if (nextPerField != null) {
doNextCall = nextPerField.start(field, first);
}

View File

@ -267,15 +267,13 @@ public class QueryBuilder {
*/
private Query analyzeTerm(String field, TokenStream stream) throws IOException {
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
stream.reset();
if (!stream.incrementToken()) {
throw new AssertionError();
}
termAtt.fillBytesRef();
return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
return newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())));
}
/**
@ -286,12 +284,10 @@ public class QueryBuilder {
q.setDisableCoord(true);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
stream.reset();
while (stream.incrementToken()) {
termAtt.fillBytesRef();
Query currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
Query currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())));
q.add(currentQuery, BooleanClause.Occur.SHOULD);
}
@ -317,18 +313,15 @@ public class QueryBuilder {
BooleanQuery.Builder currentQuery = newBooleanQuery(true);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
stream.reset();
while (stream.incrementToken()) {
termAtt.fillBytesRef();
if (posIncrAtt.getPositionIncrement() != 0) {
add(q, currentQuery.build(), operator);
currentQuery = newBooleanQuery(true);
}
currentQuery.add(newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))), BooleanClause.Occur.SHOULD);
currentQuery.add(newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()))), BooleanClause.Occur.SHOULD);
}
add(q, currentQuery.build(), operator);
@ -343,21 +336,17 @@ public class QueryBuilder {
builder.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1;
stream.reset();
while (stream.incrementToken()) {
termAtt.fillBytesRef();
if (enablePositionIncrements) {
position += posIncrAtt.getPositionIncrement();
} else {
position += 1;
}
builder.add(new Term(field, bytes), position);
builder.add(new Term(field, termAtt.getBytesRef()), position);
}
return builder.build();
@ -371,7 +360,6 @@ public class QueryBuilder {
mpq.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1;
@ -379,7 +367,6 @@ public class QueryBuilder {
List<Term> multiTerms = new ArrayList<>();
stream.reset();
while (stream.incrementToken()) {
termAtt.fillBytesRef();
int positionIncrement = posIncrAtt.getPositionIncrement();
if (positionIncrement > 0 && multiTerms.size() > 0) {
@ -391,7 +378,7 @@ public class QueryBuilder {
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes)));
multiTerms.add(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())));
}
if (enablePositionIncrements) {

View File

@ -17,7 +17,6 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.analysis.NumericTokenStream.NumericTermAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
@ -40,14 +39,12 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
assertNotNull(typeAtt);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
assertNotNull(numericAtt);
final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset();
assertEquals(64, numericAtt.getValueSize());
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift());
bytesAtt.fillBytesRef();
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytesAtt.getBytesRef()));
assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
@ -65,14 +62,12 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
assertNotNull(typeAtt);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
assertNotNull(numericAtt);
final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset();
assertEquals(32, numericAtt.getValueSize());
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift());
bytesAtt.fillBytesRef();
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytesAtt.getBytesRef()));
assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
@ -123,6 +118,7 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
public void testAttributeClone() throws Exception {
NumericTermAttributeImpl att = new NumericTermAttributeImpl();
att.init(1234L, 64, 8, 0); // set some value, to make getBytesRef() work
NumericTermAttributeImpl copy = TestCharTermAttributeImpl.assertCloneIsEqual(att);
assertNotSame(att.getBytesRef(), copy.getBytesRef());
NumericTermAttributeImpl copy2 = TestCharTermAttributeImpl.assertCopyIsEqual(att);

View File

@ -1,86 +0,0 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.analysis.CannedBinaryTokenStream; // javadocs
/**
* A binary tokenstream that lets you index a single
* binary token (BytesRef value).
*
* @see CannedBinaryTokenStream
*/
public final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
private boolean available = true;
public BinaryTokenStream(BytesRef bytes) {
bytesAtt.setBytesRef(bytes);
}
@Override
public boolean incrementToken() {
if (available) {
clearAttributes();
available = false;
return true;
}
return false;
}
@Override
public void reset() {
available = true;
}
public interface ByteTermAttribute extends TermToBytesRefAttribute {
public void setBytesRef(BytesRef bytes);
}
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
private BytesRef bytes;
@Override
public void fillBytesRef() {
// no-op: the bytes was already filled by our owner's incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
@Override
public void clear() {}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
}
}

View File

@ -108,11 +108,6 @@ public class Test2BTerms extends LuceneTestCase {
}
private final static class MyTermAttributeImpl extends AttributeImpl implements TermToBytesRefAttribute {
@Override
public void fillBytesRef() {
// no-op: the bytes was already filled by our owner's incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
@ -122,18 +117,9 @@ public class Test2BTerms extends LuceneTestCase {
public void clear() {
}
@Override
public boolean equals(Object other) {
return other == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
@Override
public void copyTo(AttributeImpl target) {
throw new UnsupportedOperationException();
}
@Override

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
@ -38,7 +37,6 @@ public class TestBinaryTerms extends LuceneTestCase {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
BytesRef bytes = new BytesRef(2);
BinaryTokenStream tokenStream = new BinaryTokenStream(bytes);
for (int i = 0; i < 256; i++) {
bytes.bytes[0] = (byte) i;
@ -47,8 +45,8 @@ public class TestBinaryTerms extends LuceneTestCase {
Document doc = new Document();
FieldType customType = new FieldType();
customType.setStored(true);
doc.add(new Field("id", "" + i, customType));
doc.add(new TextField("bytes", tokenStream));
doc.add(newField("id", "" + i, customType));
doc.add(newStringField("bytes", bytes, Field.Store.NO));
iw.addDocument(doc);
}

View File

@ -49,14 +49,13 @@ public class TestLongPostings extends LuceneTestCase {
}
try (TokenStream ts = a.tokenStream("foo", s)) {
final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
final BytesRef termBytes = termAtt.getBytesRef();
ts.reset();
int count = 0;
boolean changed = false;
while(ts.incrementToken()) {
termAtt.fillBytesRef();
final BytesRef termBytes = termAtt.getBytesRef();
if (count == 0 && !termBytes.utf8ToString().equals(s)) {
// The value was changed during analysis. Keep iterating so the
// tokenStream is exhausted.

View File

@ -23,17 +23,12 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
@ -94,78 +89,6 @@ public class TestPrefixQuery extends LuceneTestCase {
directory.close();
}
static final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
private boolean available = true;
public BinaryTokenStream(BytesRef bytes) {
bytesAtt.setBytesRef(bytes);
}
@Override
public boolean incrementToken() {
if (available) {
clearAttributes();
available = false;
return true;
}
return false;
}
@Override
public void reset() {
available = true;
}
public interface ByteTermAttribute extends TermToBytesRefAttribute {
public void setBytesRef(BytesRef bytes);
}
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
private BytesRef bytes;
@Override
public void fillBytesRef() {
// no-op: the bytes was already filled by our owner's incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
@Override
public void clear() {}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
}
}
/** Basically a StringField that accepts binary term. */
private static class BinaryField extends Field {
final static FieldType TYPE;
static {
TYPE = new FieldType(StringField.TYPE_NOT_STORED);
// Necessary so our custom tokenStream is used by Field.tokenStream:
TYPE.setTokenized(true);
TYPE.freeze();
}
public BinaryField(String name, BytesRef value) {
super(name, new BinaryTokenStream(value), TYPE);
}
}
public void testRandomBinaryPrefix() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
@ -182,7 +105,7 @@ public class TestPrefixQuery extends LuceneTestCase {
Collections.shuffle(termsList, random());
for(BytesRef term : termsList) {
Document doc = new Document();
doc.add(new BinaryField("field", term));
doc.add(newStringField("field", term, Field.Store.NO));
w.addDocument(doc);
}

View File

@ -167,14 +167,11 @@ public abstract class AbstractTestCase extends LuceneTestCase {
try (TokenStream tokenStream = analyzer.tokenStream(field, text)) {
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytesRef = termAttribute.getBytesRef();
tokenStream.reset();
while (tokenStream.incrementToken()) {
termAttribute.fillBytesRef();
bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
bytesRefs.add(BytesRef.deepCopyOf(termAttribute.getBytesRef()));
}
tokenStream.end();

View File

@ -453,18 +453,16 @@ public class MemoryIndex {
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
PayloadAttribute payloadAtt = storePayloads ? stream.addAttribute(PayloadAttribute.class) : null;
BytesRef ref = termAtt.getBytesRef();
stream.reset();
while (stream.incrementToken()) {
termAtt.fillBytesRef();
// if (DEBUG) System.err.println("token='" + term + "'");
numTokens++;
final int posIncr = posIncrAttribute.getPositionIncrement();
if (posIncr == 0)
numOverlapTokens++;
pos += posIncr;
int ord = terms.add(ref);
int ord = terms.add(termAtt.getBytesRef());
if (ord < 0) {
ord = (-ord) - 1;
postingsWriter.reset(sliceArray.end[ord]);

View File

@ -612,15 +612,14 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
return bytes;
} catch (IOException e) {
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
}

View File

@ -53,11 +53,9 @@ public class SpanOrTermsBuilder extends SpanBuilderBase {
try (TokenStream ts = analyzer.tokenStream(fieldName, value)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef())));
clausesList.add(stq);
}
ts.end();

View File

@ -55,11 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder {
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null;
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
term = new Term(fieldName, BytesRef.deepCopyOf(bytes));
term = new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef()));
bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
}
ts.end();

View File

@ -61,8 +61,6 @@ public class TokenStreamToTermAutomatonQuery {
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class);
final BytesRef term = termBytesAtt.getBytesRef();
in.reset();
TermAutomatonQuery query = new TermAutomatonQuery(field);
@ -93,7 +91,7 @@ public class TokenStreamToTermAutomatonQuery {
state = query.createState();
}
termBytesAtt.fillBytesRef();
BytesRef term = termBytesAtt.getBytesRef();
//System.out.println(pos + "-" + endPos + ": " + term.utf8ToString() + ": posInc=" + posInc);
if (term.length == 1 && term.bytes[term.offset] == (byte) '*') {
query.addAnyTransition(pos, endPos);

View File

@ -20,11 +20,7 @@ package org.apache.lucene.spatial.prefix;
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
@ -37,85 +33,6 @@ import org.apache.lucene.util.BytesRefIterator;
*/
class BytesRefIteratorTokenStream extends TokenStream {
// just a wrapper to prevent adding CharTermAttribute
private static final class BRAttributeFactory extends AttributeFactory {
private final AttributeFactory delegate;
BRAttributeFactory(AttributeFactory delegate) {
this.delegate = delegate;
}
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
if (CharTermAttribute.class.isAssignableFrom(attClass))
throw new IllegalArgumentException(getClass() + " does not support CharTermAttribute.");
return delegate.createAttributeInstance(attClass);
}
}
private static final class BRTermToBytesRefAttributeImpl extends AttributeImpl
implements TermToBytesRefAttribute {
private final BytesRef bytes = new BytesRef();
void setBytesRef(BytesRef inputBytes) {
// shallow clone. this.bytesRef is final
bytes.bytes = inputBytes.bytes;
bytes.offset = inputBytes.offset;
bytes.length = inputBytes.length;
}
@Override
public void clear() {
// we keep it untouched as it's fully controlled by the outer class.
}
@Override
public void copyTo(AttributeImpl target) {
final BRTermToBytesRefAttributeImpl a = (BRTermToBytesRefAttributeImpl) target;
a.setBytesRef(BytesRef.deepCopyOf(bytes));
}
@Override
public void fillBytesRef() {
//nothing to do; it's populated by incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public BRTermToBytesRefAttributeImpl clone() {
// super.clone won't work since we need a new BytesRef reference and it's nice to have it final. The superclass
// has no state to copy anyway.
final BRTermToBytesRefAttributeImpl clone = new BRTermToBytesRefAttributeImpl();
clone.setBytesRef(BytesRef.deepCopyOf(bytes));
return clone;
}
@Override
public int hashCode() {
return bytes.hashCode();
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
BRTermToBytesRefAttributeImpl other = (BRTermToBytesRefAttributeImpl) obj;
if (!bytes.equals(other.bytes)) return false;
return true;
}
}
public BytesRefIteratorTokenStream() {
super(new BRAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY));
addAttributeImpl(new BRTermToBytesRefAttributeImpl());//because non-public constructor
bytesAtt = (BRTermToBytesRefAttributeImpl) addAttribute(TermToBytesRefAttribute.class);
}
public BytesRefIterator getBytesRefIterator() {
return bytesIter;
}
@ -129,7 +46,6 @@ class BytesRefIteratorTokenStream extends TokenStream {
public void reset() throws IOException {
if (bytesIter == null)
throw new IllegalStateException("call setBytesRefIterator() before usage");
bytesAtt.getBytesRef().length = 0;
}
@Override
@ -137,14 +53,12 @@ class BytesRefIteratorTokenStream extends TokenStream {
if (bytesIter == null)
throw new IllegalStateException("call setBytesRefIterator() before usage");
// this will only clear all other attributes in this TokenStream
clearAttributes();//TODO but there should be no "other" attributes
// get next
BytesRef bytes = bytesIter.next();
if (bytes == null) {
return false;
} else {
clearAttributes();
bytesAtt.setBytesRef(bytes);
//note: we don't bother setting posInc or type attributes. There's no point to it.
return true;
@ -152,7 +66,7 @@ class BytesRefIteratorTokenStream extends TokenStream {
}
//members
private final BRTermToBytesRefAttributeImpl bytesAtt;
private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class);
private BytesRefIterator bytesIter = null; // null means not initialized

View File

@ -486,11 +486,10 @@ public class FreeTextSuggester extends Lookup implements Accountable {
// Run full analysis, but save only the
// last 1gram, last 2gram, etc.:
BytesRef tokenBytes = termBytesAtt.getBytesRef();
int maxEndOffset = -1;
boolean sawRealToken = false;
while(ts.incrementToken()) {
termBytesAtt.fillBytesRef();
BytesRef tokenBytes = termBytesAtt.getBytesRef();
sawRealToken |= tokenBytes.length > 0;
// TODO: this is somewhat iffy; today, ShingleFilter
// sets posLen to the gram count; maybe we should make

View File

@ -58,7 +58,7 @@ public final class CompletionTokenStream extends TokenStream {
private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class);
private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class);
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
private final BytesRefBuilderTermAttribute bytesAtt = addAttribute(BytesRefBuilderTermAttribute.class);
private final TokenStream input;
final boolean preserveSep;
@ -309,9 +309,7 @@ public final class CompletionTokenStream extends TokenStream {
/**
* Attribute providing access to the term builder and UTF-16 conversion
*/
private interface ByteTermAttribute extends TermToBytesRefAttribute {
// marker interface
private interface BytesRefBuilderTermAttribute extends TermToBytesRefAttribute {
/**
* Returns the builder from which the term is derived.
*/
@ -326,20 +324,15 @@ public final class CompletionTokenStream extends TokenStream {
/**
* Custom attribute implementation for completion token stream
*/
public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
public static final class BytesRefBuilderTermAttributeImpl extends AttributeImpl implements BytesRefBuilderTermAttribute, TermToBytesRefAttribute {
private final BytesRefBuilder bytes = new BytesRefBuilder();
private CharsRefBuilder charsRef;
private transient CharsRefBuilder charsRef;
/**
* Sole constructor
* no-op
*/
public ByteTermAttributeImpl() {
}
@Override
public void fillBytesRef() {
// does nothing - we change in place
public BytesRefBuilderTermAttributeImpl() {
}
@Override
@ -359,10 +352,17 @@ public final class CompletionTokenStream extends TokenStream {
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
BytesRefBuilderTermAttributeImpl other = (BytesRefBuilderTermAttributeImpl) target;
other.bytes.copyBytes(bytes);
}
@Override
public AttributeImpl clone() {
BytesRefBuilderTermAttributeImpl other = new BytesRefBuilderTermAttributeImpl();
copyTo(other);
return other;
}
@Override
public CharSequence toUTF16() {
if (charsRef == null) {

View File

@ -51,7 +51,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;

View File

@ -17,13 +17,11 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
/**
* TokenStream from a canned list of binary (BytesRef-based)
@ -54,65 +52,11 @@ public final class CannedBinaryTokenStream extends TokenStream {
private final BinaryToken[] tokens;
private int upto = 0;
private final BinaryTermAttribute termAtt = addAttribute(BinaryTermAttribute.class);
private final BytesTermAttribute termAtt = addAttribute(BytesTermAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/** An attribute extending {@link
* TermToBytesRefAttribute} but exposing {@link
* #setBytesRef} method. */
public interface BinaryTermAttribute extends TermToBytesRefAttribute {
/** Set the current binary value. */
public void setBytesRef(BytesRef bytes);
}
/** Implementation for {@link BinaryTermAttribute}. */
public final static class BinaryTermAttributeImpl extends AttributeImpl implements BinaryTermAttribute, TermToBytesRefAttribute {
private final BytesRefBuilder bytes = new BytesRefBuilder();
@Override
public void fillBytesRef() {
bytes.get(); // sets the length on the bytesref
}
@Override
public BytesRef getBytesRef() {
return bytes.get();
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes.copyBytes(bytes);
}
@Override
public void clear() {
}
@Override
public boolean equals(Object other) {
return other == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
@Override
public void copyTo(AttributeImpl target) {
BinaryTermAttributeImpl other = (BinaryTermAttributeImpl) target;
other.bytes.copyBytes(bytes);
}
@Override
public BinaryTermAttributeImpl clone() {
throw new UnsupportedOperationException();
}
}
public CannedBinaryTokenStream(BinaryToken... tokens) {
super();
this.tokens = tokens;

View File

@ -183,12 +183,10 @@ public abstract class CollationTestBase extends LuceneTestCase {
String term = TestUtil.randomSimpleString(random());
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
assertTrue(ts.incrementToken());
termAtt.fillBytesRef();
// ensure we make a copy of the actual bytes too
map.put(term, BytesRef.deepCopyOf(bytes));
map.put(term, BytesRef.deepCopyOf(termAtt.getBytesRef()));
assertFalse(ts.incrementToken());
ts.end();
}
@ -205,11 +203,9 @@ public abstract class CollationTestBase extends LuceneTestCase {
BytesRef expected = mapping.getValue();
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
assertTrue(ts.incrementToken());
termAtt.fillBytesRef();
assertEquals(expected, bytes);
assertEquals(expected, termAtt.getBytesRef());
assertFalse(ts.incrementToken());
ts.end();
}

View File

@ -34,11 +34,11 @@ public class MockUTF16TermAttributeImpl extends CharTermAttributeImpl {
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, MockUTF16TermAttributeImpl.class);
@Override
public void fillBytesRef() {
BytesRef bytes = getBytesRef();
byte[] utf16 = toString().getBytes(StandardCharsets.UTF_16LE);
bytes.bytes = utf16;
bytes.offset = 0;
bytes.length = utf16.length;
public BytesRef getBytesRef() {
final BytesRef ref = this.builder.get();
ref.bytes = toString().getBytes(StandardCharsets.UTF_16LE);
ref.offset = 0;
ref.length = ref.bytes.length;
return ref;
}
}

View File

@ -252,16 +252,16 @@ public class ICUCollationField extends FieldType {
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
// we control the analyzer here: most errors are impossible
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for text: " + text);
termAtt.fillBytesRef();
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
assert !source.incrementToken();
source.end();
return BytesRef.deepCopyOf(bytes);
return bytes;
} catch (IOException e) {
throw new RuntimeException("Unable to analyze text: " + text, e);
}

View File

@ -148,13 +148,11 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
try (TokenStream tokenStream = analyzer.tokenStream("", query)){
final Set<BytesRef> tokens = new HashSet<>();
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
final BytesRef bytes = bytesAtt.getBytesRef();
tokenStream.reset();
while (tokenStream.incrementToken()) {
bytesAtt.fillBytesRef();
tokens.add(BytesRef.deepCopyOf(bytes));
tokens.add(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
}
tokenStream.end();
@ -246,7 +244,6 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<>();
final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
BytesRef rawBytes = termAtt.getBytesRef();
termAtt.fillBytesRef();
final String text = fieldType.indexedToReadable(rawBytes, new CharsRefBuilder()).toString();
tokenNamedList.add("text", text);

View File

@ -224,16 +224,14 @@ public class CollationField extends FieldType {
try (TokenStream source = analyzer.tokenStream(field, text)) {
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
// we control the analyzer here: most errors are impossible
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for text: " + text);
termAtt.fillBytesRef();
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
assert !source.incrementToken();
source.end();
return BytesRef.deepCopyOf(bytes);
return bytes;
} catch (IOException e) {
throw new RuntimeException("Unable to analyze text: " + text, e);
}

View File

@ -146,16 +146,15 @@ public class TextField extends FieldType {
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
if (source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
return bytes;
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
}

View File

@ -267,10 +267,9 @@ public class CursorMarkTest extends SolrTestCaseJ4 {
String term = TestUtil.randomRealisticUnicodeString(random());
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
val = termAtt.getBytesRef();
ts.reset();
assertTrue(ts.incrementToken());
termAtt.fillBytesRef();
val = BytesRef.deepCopyOf(termAtt.getBytesRef());
assertFalse(ts.incrementToken());
ts.end();
}