LUCENE-6653, LUCENE-6652: Refactor TermToBytesRefAttribute; add oal.analysis.tokenattributes.BytesTermAttribute; remove code duplication in tests

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1688830 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2015-07-02 14:09:29 +00:00
parent 96a0dc55d5
commit 1816ed1945
40 changed files with 222 additions and 615 deletions

View File

@ -116,6 +116,10 @@ New Features
* LUCENE-6632: Geo3D: Compute circle planes more accurately. * LUCENE-6632: Geo3D: Compute circle planes more accurately.
(Karl Wright via David Smiley) (Karl Wright via David Smiley)
* LUCENE-6653: Added general purpose BytesTermAttribute to basic token
attributes package that can be used for TokenStreams that solely produce
binary terms. (Uwe Schindler)
API Changes API Changes
* LUCENE-6508: Simplify Lock api, there is now just * LUCENE-6508: Simplify Lock api, there is now just
@ -295,6 +299,9 @@ Test Framework
environments (e.g., read-only dirs). If tests are running without a security environments (e.g., read-only dirs). If tests are running without a security
manager, an assume cancels test execution automatically. (Uwe Schindler) manager, an assume cancels test execution automatically. (Uwe Schindler)
* LUCENE-6652: Removed lots of useless Byte(s)TermAttributes all over test
infrastructure. (Uwe Schindler)
Changes in Backwards Compatibility Policy Changes in Backwards Compatibility Policy
* LUCENE-6553: The iterator returned by the LeafReader.postings method now * LUCENE-6553: The iterator returned by the LeafReader.postings method now
@ -305,6 +312,11 @@ Changes in Backwards Compatibility Policy
DiversifiedTopDocsCollector can be used instead with a maximum number of hits DiversifiedTopDocsCollector can be used instead with a maximum number of hits
per key equal to 1. (Adrien Grand) per key equal to 1. (Adrien Grand)
* LUCENE-6653: The workflow for consuming the TermToBytesRefAttribute was changed:
getBytesRef() now does all work and is called on each token, fillBytesRef()
was removed. The implementation is free to reuse the internal BytesRef
or return a new one on each call. (Uwe Schindler)
======================= Lucene 5.2.1 ======================= ======================= Lucene 5.2.1 =======================
Bug Fixes Bug Fixes

View File

@ -40,11 +40,12 @@ public class CollatedTermAttributeImpl extends CharTermAttributeImpl {
} }
@Override @Override
public void fillBytesRef() { public BytesRef getBytesRef() {
BytesRef bytes = getBytesRef(); final BytesRef ref = this.builder.get();
bytes.bytes = collator.getCollationKey(toString()).toByteArray(); ref.bytes = collator.getCollationKey(toString()).toByteArray();
bytes.offset = 0; ref.offset = 0;
bytes.length = bytes.bytes.length; ref.length = ref.bytes.length;
return ref;
} }
} }

View File

@ -45,11 +45,12 @@ public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
} }
@Override @Override
public void fillBytesRef() { public BytesRef getBytesRef() {
BytesRef bytes = getBytesRef();
collator.getRawCollationKey(toString(), key); collator.getRawCollationKey(toString(), key);
bytes.bytes = key.bytes; final BytesRef ref = this.builder.get();
bytes.offset = 0; ref.bytes = key.bytes;
bytes.length = key.size; ref.offset = 0;
ref.length = key.size;
return ref;
} }
} }

View File

@ -96,9 +96,4 @@ public class MorphosyntacticTagsAttributeImpl extends AttributeImpl
this.copyTo(cloned); this.copyTo(cloned);
return cloned; return cloned;
} }
@Override
public String toString() {
return tags == null ? "<no tags>" : tags.toString();
}
} }

View File

@ -87,7 +87,7 @@ public class ReadTokensTask extends PerfTask {
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
while(stream.incrementToken()) { while(stream.incrementToken()) {
termAtt.fillBytesRef(); termAtt.getBytesRef();
tokenCount++; tokenCount++;
} }
stream.end(); stream.end();

View File

@ -976,8 +976,6 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
assertTrue(ts2.incrementToken()); assertTrue(ts2.incrementToken());
BytesRef bytes1 = termAtt1.getBytesRef(); BytesRef bytes1 = termAtt1.getBytesRef();
BytesRef bytes2 = termAtt2.getBytesRef(); BytesRef bytes2 = termAtt2.getBytesRef();
termAtt1.fillBytesRef();
termAtt2.fillBytesRef();
assertEquals(bytes1, bytes2); assertEquals(bytes1, bytes2);
assertFalse(ts1.incrementToken()); assertFalse(ts1.incrementToken());
assertFalse(ts2.incrementToken()); assertFalse(ts2.incrementToken());

View File

@ -29,8 +29,6 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -58,7 +56,6 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
@ -234,7 +231,7 @@ public class TestAutoPrefixTerms extends LuceneTestCase {
for(Integer term : terms) { for(Integer term : terms) {
Document doc = new Document(); Document doc = new Document();
doc.add(new BinaryField("field", intToBytes(term))); doc.add(newStringField("field", intToBytes(term), Field.Store.NO));
doc.add(new NumericDocValuesField("field", term)); doc.add(new NumericDocValuesField("field", term));
w.addDocument(doc); w.addDocument(doc);
} }
@ -506,78 +503,6 @@ public class TestAutoPrefixTerms extends LuceneTestCase {
dir.close(); dir.close();
} }
static final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
private boolean available = true;
public BinaryTokenStream(BytesRef bytes) {
bytesAtt.setBytesRef(bytes);
}
@Override
public boolean incrementToken() {
if (available) {
clearAttributes();
available = false;
return true;
}
return false;
}
@Override
public void reset() {
available = true;
}
public interface ByteTermAttribute extends TermToBytesRefAttribute {
void setBytesRef(BytesRef bytes);
}
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
private BytesRef bytes;
@Override
public void fillBytesRef() {
// no-op: the bytes was already filled by our owner's incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
@Override
public void clear() {}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
}
}
/** Basically a StringField that accepts binary term. */
private static class BinaryField extends Field {
final static FieldType TYPE;
static {
TYPE = new FieldType(StringField.TYPE_NOT_STORED);
// Necessary so our custom tokenStream is used by Field.tokenStream:
TYPE.setTokenized(true);
TYPE.freeze();
}
public BinaryField(String name, BytesRef value) {
super(name, new BinaryTokenStream(value), TYPE);
}
}
/** Helper class to ensure auto-prefix terms 1) never overlap one another, and 2) are used when they should be. */ /** Helper class to ensure auto-prefix terms 1) never overlap one another, and 2) are used when they should be. */
private static class VerifyAutoPrefixTerms { private static class VerifyAutoPrefixTerms {
final FixedBitSet allHits; final FixedBitSet allHits;

View File

@ -158,17 +158,13 @@ public final class NumericTokenStream extends TokenStream {
@Override @Override
public BytesRef getBytesRef() { public BytesRef getBytesRef() {
return bytes.get();
}
@Override
public void fillBytesRef() {
assert valueSize == 64 || valueSize == 32; assert valueSize == 64 || valueSize == 32;
if (valueSize == 64) { if (valueSize == 64) {
NumericUtils.longToPrefixCoded(value, shift, bytes); NumericUtils.longToPrefixCoded(value, shift, bytes);
} else { } else {
NumericUtils.intToPrefixCoded((int) value, shift, bytes); NumericUtils.intToPrefixCoded((int) value, shift, bytes);
} }
return bytes.get();
} }
@Override @Override
@ -201,8 +197,7 @@ public final class NumericTokenStream extends TokenStream {
@Override @Override
public void reflectWith(AttributeReflector reflector) { public void reflectWith(AttributeReflector reflector) {
fillBytesRef(); reflector.reflect(TermToBytesRefAttribute.class, "bytes", getBytesRef());
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes.toBytesRef());
reflector.reflect(NumericTermAttribute.class, "shift", shift); reflector.reflect(NumericTermAttribute.class, "shift", shift);
reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue()); reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());
reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize); reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize);
@ -219,7 +214,7 @@ public final class NumericTokenStream extends TokenStream {
NumericTermAttributeImpl t = (NumericTermAttributeImpl)super.clone(); NumericTermAttributeImpl t = (NumericTermAttributeImpl)super.clone();
// Do a deep clone // Do a deep clone
t.bytes = new BytesRefBuilder(); t.bytes = new BytesRefBuilder();
t.bytes.copyBytes(bytes.get()); t.bytes.copyBytes(getBytesRef());
return t; return t;
} }

View File

@ -105,8 +105,6 @@ public class TokenStreamToAutomaton {
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class); final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class); final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class);
final BytesRef term = termBytesAtt.getBytesRef();
in.reset(); in.reset();
// Only temporarily holds states ahead of our current // Only temporarily holds states ahead of our current
@ -157,8 +155,7 @@ public class TokenStreamToAutomaton {
final int endPos = pos + posLengthAtt.getPositionLength(); final int endPos = pos + posLengthAtt.getPositionLength();
termBytesAtt.fillBytesRef(); final BytesRef termUTF8 = changeToken(termBytesAtt.getBytesRef());
final BytesRef termUTF8 = changeToken(term);
int[] termUnicode = null; int[] termUnicode = null;
final Position endPosData = positions.get(endPos); final Position endPosData = positions.get(endPos);
if (endPosData.arriving == -1) { if (endPosData.arriving == -1) {

View File

@ -0,0 +1,31 @@
package org.apache.lucene.analysis.tokenattributes;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
/**
* This attribute can be used if you have the raw term bytes to be indexed.
* It can be used as replacement for {@link CharTermAttribute}, if binary
* terms should be indexed.
* @lucene.internal
*/
public interface BytesTermAttribute extends TermToBytesRefAttribute {
/** Sets the {@link BytesRef} of the term */
public void setBytesRef(BytesRef bytes);
}

View File

@ -0,0 +1,65 @@
package org.apache.lucene.analysis.tokenattributes;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
/** Implementation class for {@link BytesTermAttribute}.
* @lucene.internal
*/
public class BytesTermAttributeImpl extends AttributeImpl implements BytesTermAttribute, TermToBytesRefAttribute {
private BytesRef bytes;
/** Initialize this attribute with no bytes. */
public BytesTermAttributeImpl() {}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
@Override
public void clear() {
this.bytes = null;
}
@Override
public void copyTo(AttributeImpl target) {
BytesTermAttributeImpl other = (BytesTermAttributeImpl) target;
other.bytes = BytesRef.deepCopyOf(bytes);
}
@Override
public AttributeImpl clone() {
BytesTermAttributeImpl c = (BytesTermAttributeImpl)super.clone();
copyTo(c);
return c;
}
@Override
public void reflectWith(AttributeReflector reflector) {
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
}
}

View File

@ -33,6 +33,9 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)]; private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
private int termLength = 0; private int termLength = 0;
/** May be used by subclasses to convert to different charsets / encodings for implementing {@link #getBytesRef()}. */
protected BytesRefBuilder builder = new BytesRefBuilder();
/** Initialize this attribute with empty term text */ /** Initialize this attribute with empty term text */
public CharTermAttributeImpl() {} public CharTermAttributeImpl() {}
@ -83,16 +86,11 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
} }
// *** TermToBytesRefAttribute interface *** // *** TermToBytesRefAttribute interface ***
private BytesRefBuilder bytes = new BytesRefBuilder();
@Override
public void fillBytesRef() {
bytes.copyChars(termBuffer, 0, termLength);
}
@Override @Override
public BytesRef getBytesRef() { public BytesRef getBytesRef() {
return bytes.get(); builder.copyChars(termBuffer, 0, termLength);
return builder.get();
} }
// *** CharSequence interface *** // *** CharSequence interface ***
@ -228,8 +226,8 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
// Do a deep clone // Do a deep clone
t.termBuffer = new char[this.termLength]; t.termBuffer = new char[this.termLength];
System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength); System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength);
t.bytes = new BytesRefBuilder(); t.builder = new BytesRefBuilder();
t.bytes.copyBytes(bytes.get()); t.builder.copyBytes(builder.get());
return t; return t;
} }
@ -271,8 +269,7 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
@Override @Override
public void reflectWith(AttributeReflector reflector) { public void reflectWith(AttributeReflector reflector) {
reflector.reflect(CharTermAttribute.class, "term", toString()); reflector.reflect(CharTermAttribute.class, "term", toString());
fillBytesRef(); reflector.reflect(TermToBytesRefAttribute.class, "bytes", getBytesRef());
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes.toBytesRef());
} }
@Override @Override

View File

@ -24,17 +24,12 @@ import org.apache.lucene.util.BytesRef;
* This attribute is requested by TermsHashPerField to index the contents. * This attribute is requested by TermsHashPerField to index the contents.
* This attribute can be used to customize the final byte[] encoding of terms. * This attribute can be used to customize the final byte[] encoding of terms.
* <p> * <p>
* Consumers of this attribute call {@link #getBytesRef()} up-front, and then * Consumers of this attribute call {@link #getBytesRef()} for each term. Example:
* invoke {@link #fillBytesRef()} for each term. Example:
* <pre class="prettyprint"> * <pre class="prettyprint">
* final TermToBytesRefAttribute termAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); * final TermToBytesRefAttribute termAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
* final BytesRef bytes = termAtt.getBytesRef();
* *
* while (tokenStream.incrementToken() { * while (tokenStream.incrementToken() {
* * final BytesRef bytes = termAtt.getBytesRef();
* // you must call termAtt.fillBytesRef() before doing something with the bytes.
* // this encodes the term value (internally it might be a char[], etc) into the bytes.
* int hashCode = termAtt.fillBytesRef();
* *
* if (isInteresting(bytes)) { * if (isInteresting(bytes)) {
* *
@ -42,27 +37,21 @@ import org.apache.lucene.util.BytesRef;
* // you should make a copy if you need persistent access to the bytes, otherwise they will * // you should make a copy if you need persistent access to the bytes, otherwise they will
* // be rewritten across calls to incrementToken() * // be rewritten across calls to incrementToken()
* *
* doSomethingWith(new BytesRef(bytes)); * doSomethingWith(BytesRef.deepCopyOf(bytes));
* } * }
* } * }
* ... * ...
* </pre> * </pre>
* @lucene.experimental This is a very expert API, please use * @lucene.internal This is a very expert and internal API, please use
* {@link CharTermAttributeImpl} and its implementation of this method * {@link CharTermAttribute} and its implementation for UTF-8 terms; to
* for UTF-8 terms. * index binary terms, use {@link BytesTermAttribute} and its implementation.
*/ */
public interface TermToBytesRefAttribute extends Attribute { public interface TermToBytesRefAttribute extends Attribute {
/**
* Updates the bytes {@link #getBytesRef()} to contain this term's
* final encoding.
*/
public void fillBytesRef();
/** /**
* Retrieve this attribute's BytesRef. The bytes are updated * Retrieve this attribute's BytesRef. The bytes are updated from the current term.
* from the current term when the consumer calls {@link #fillBytesRef()}. * The implementation may return a new instance or keep the previous one.
* @return this Attributes internal BytesRef. * @return a BytesRef to be indexed (only stays valid until token stream gets incremented)
*/ */
public BytesRef getBytesRef(); public BytesRef getBytesRef();
} }

View File

@ -23,17 +23,14 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.NumericTokenStream; import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.FieldType.NumericType; import org.apache.lucene.document.FieldType.NumericType;
import org.apache.lucene.index.FieldInvertState; // javadocs
import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StorableField;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
/** /**
@ -570,71 +567,47 @@ public class Field implements IndexableField, StorableField {
} }
private static final class BinaryTokenStream extends TokenStream { private static final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class); private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class);
private boolean used = true;
// Do not init this to true, becase caller must first call reset: private BytesRef value;
private boolean available;
public BinaryTokenStream() { /** Creates a new TokenStream that returns a BytesRef as single token.
* <p>Warning: Does not initialize the value, you must call
* {@link #setValue(BytesRef)} afterwards!
*/
BinaryTokenStream() {
} }
public void setValue(BytesRef value) { public void setValue(BytesRef value) {
bytesAtt.setBytesRef(value); this.value = value;
} }
@Override @Override
public boolean incrementToken() { public boolean incrementToken() {
if (available) { if (used) {
clearAttributes(); return false;
available = false;
return true;
} }
return false; clearAttributes();
bytesAtt.setBytesRef(value);
used = true;
return true;
} }
@Override @Override
public void reset() { public void reset() {
available = true; used = false;
} }
public interface ByteTermAttribute extends TermToBytesRefAttribute {
public void setBytesRef(BytesRef bytes);
}
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
private BytesRef bytes;
@Override
public void fillBytesRef() {
// no-op: the bytes was already filled by our owner's incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override @Override
public void setBytesRef(BytesRef bytes) { public void close() {
this.bytes = bytes; value = null;
}
@Override
public void clear() {
}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
} }
} }
private static final class StringTokenStream extends TokenStream { private static final class StringTokenStream extends TokenStream {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private boolean used = false; private boolean used = true;
private String value = null; private String value = null;
/** Creates a new TokenStream that returns a String as single token. /** Creates a new TokenStream that returns a String as single token.

View File

@ -36,7 +36,6 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
protected final DocumentsWriterPerThread.DocState docState; protected final DocumentsWriterPerThread.DocState docState;
protected final FieldInvertState fieldState; protected final FieldInvertState fieldState;
TermToBytesRefAttribute termAtt; TermToBytesRefAttribute termAtt;
BytesRef termBytesRef;
// Copied from our perThread // Copied from our perThread
final IntBlockPool intPool; final IntBlockPool intPool;
@ -145,13 +144,10 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
* entry point (for first TermsHash); postings use this * entry point (for first TermsHash); postings use this
* API. */ * API. */
void add() throws IOException { void add() throws IOException {
termAtt.fillBytesRef();
// We are first in the chain so we must "intern" the // We are first in the chain so we must "intern" the
// term text into textStart address // term text into textStart address
// Get the text & hash of this term. // Get the text & hash of this term.
int termID = bytesHash.add(termBytesRef); int termID = bytesHash.add(termAtt.getBytesRef());
//System.out.println("add term=" + termBytesRef.utf8ToString() + " doc=" + docState.docID + " termID=" + termID); //System.out.println("add term=" + termBytesRef.utf8ToString() + " doc=" + docState.docID + " termID=" + termID);
@ -292,10 +288,6 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
* document. */ * document. */
boolean start(IndexableField field, boolean first) { boolean start(IndexableField field, boolean first) {
termAtt = fieldState.termAttribute; termAtt = fieldState.termAttribute;
// EmptyTokenStream can have null term att
if (termAtt != null) {
termBytesRef = termAtt.getBytesRef();
}
if (nextPerField != null) { if (nextPerField != null) {
doNextCall = nextPerField.start(field, first); doNextCall = nextPerField.start(field, first);
} }

View File

@ -267,15 +267,13 @@ public class QueryBuilder {
*/ */
private Query analyzeTerm(String field, TokenStream stream) throws IOException { private Query analyzeTerm(String field, TokenStream stream) throws IOException {
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
stream.reset(); stream.reset();
if (!stream.incrementToken()) { if (!stream.incrementToken()) {
throw new AssertionError(); throw new AssertionError();
} }
termAtt.fillBytesRef(); return newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())));
return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
} }
/** /**
@ -286,12 +284,10 @@ public class QueryBuilder {
q.setDisableCoord(true); q.setDisableCoord(true);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
stream.reset(); stream.reset();
while (stream.incrementToken()) { while (stream.incrementToken()) {
termAtt.fillBytesRef(); Query currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())));
Query currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
q.add(currentQuery, BooleanClause.Occur.SHOULD); q.add(currentQuery, BooleanClause.Occur.SHOULD);
} }
@ -317,18 +313,15 @@ public class QueryBuilder {
BooleanQuery.Builder currentQuery = newBooleanQuery(true); BooleanQuery.Builder currentQuery = newBooleanQuery(true);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
stream.reset(); stream.reset();
while (stream.incrementToken()) { while (stream.incrementToken()) {
termAtt.fillBytesRef();
if (posIncrAtt.getPositionIncrement() != 0) { if (posIncrAtt.getPositionIncrement() != 0) {
add(q, currentQuery.build(), operator); add(q, currentQuery.build(), operator);
currentQuery = newBooleanQuery(true); currentQuery = newBooleanQuery(true);
} }
currentQuery.add(newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))), BooleanClause.Occur.SHOULD); currentQuery.add(newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()))), BooleanClause.Occur.SHOULD);
} }
add(q, currentQuery.build(), operator); add(q, currentQuery.build(), operator);
@ -343,21 +336,17 @@ public class QueryBuilder {
builder.setSlop(slop); builder.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1; int position = -1;
stream.reset(); stream.reset();
while (stream.incrementToken()) { while (stream.incrementToken()) {
termAtt.fillBytesRef();
if (enablePositionIncrements) { if (enablePositionIncrements) {
position += posIncrAtt.getPositionIncrement(); position += posIncrAtt.getPositionIncrement();
} else { } else {
position += 1; position += 1;
} }
builder.add(new Term(field, bytes), position); builder.add(new Term(field, termAtt.getBytesRef()), position);
} }
return builder.build(); return builder.build();
@ -371,7 +360,6 @@ public class QueryBuilder {
mpq.setSlop(slop); mpq.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1; int position = -1;
@ -379,7 +367,6 @@ public class QueryBuilder {
List<Term> multiTerms = new ArrayList<>(); List<Term> multiTerms = new ArrayList<>();
stream.reset(); stream.reset();
while (stream.incrementToken()) { while (stream.incrementToken()) {
termAtt.fillBytesRef();
int positionIncrement = posIncrAtt.getPositionIncrement(); int positionIncrement = posIncrAtt.getPositionIncrement();
if (positionIncrement > 0 && multiTerms.size() > 0) { if (positionIncrement > 0 && multiTerms.size() > 0) {
@ -391,7 +378,7 @@ public class QueryBuilder {
multiTerms.clear(); multiTerms.clear();
} }
position += positionIncrement; position += positionIncrement;
multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes))); multiTerms.add(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())));
} }
if (enablePositionIncrements) { if (enablePositionIncrements) {

View File

@ -17,7 +17,6 @@ package org.apache.lucene.analysis;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.analysis.NumericTokenStream.NumericTermAttributeImpl; import org.apache.lucene.analysis.NumericTokenStream.NumericTermAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
@ -40,14 +39,12 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
assertNotNull(typeAtt); assertNotNull(typeAtt);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
assertNotNull(numericAtt); assertNotNull(numericAtt);
final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset(); stream.reset();
assertEquals(64, numericAtt.getValueSize()); assertEquals(64, numericAtt.getValueSize());
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken()); assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift()); assertEquals("Shift value wrong", shift, numericAtt.getShift());
bytesAtt.fillBytesRef(); assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytesAtt.getBytesRef()));
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
} }
@ -65,14 +62,12 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
assertNotNull(typeAtt); assertNotNull(typeAtt);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
assertNotNull(numericAtt); assertNotNull(numericAtt);
final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset(); stream.reset();
assertEquals(32, numericAtt.getValueSize()); assertEquals(32, numericAtt.getValueSize());
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken()); assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift()); assertEquals("Shift value wrong", shift, numericAtt.getShift());
bytesAtt.fillBytesRef(); assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytesAtt.getBytesRef()));
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
} }
@ -123,6 +118,7 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
public void testAttributeClone() throws Exception { public void testAttributeClone() throws Exception {
NumericTermAttributeImpl att = new NumericTermAttributeImpl(); NumericTermAttributeImpl att = new NumericTermAttributeImpl();
att.init(1234L, 64, 8, 0); // set some value, to make getBytesRef() work
NumericTermAttributeImpl copy = TestCharTermAttributeImpl.assertCloneIsEqual(att); NumericTermAttributeImpl copy = TestCharTermAttributeImpl.assertCloneIsEqual(att);
assertNotSame(att.getBytesRef(), copy.getBytesRef()); assertNotSame(att.getBytesRef(), copy.getBytesRef());
NumericTermAttributeImpl copy2 = TestCharTermAttributeImpl.assertCopyIsEqual(att); NumericTermAttributeImpl copy2 = TestCharTermAttributeImpl.assertCopyIsEqual(att);

View File

@ -1,86 +0,0 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.analysis.CannedBinaryTokenStream; // javadocs
/**
* A binary tokenstream that lets you index a single
* binary token (BytesRef value).
*
* @see CannedBinaryTokenStream
*/
public final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
private boolean available = true;
public BinaryTokenStream(BytesRef bytes) {
bytesAtt.setBytesRef(bytes);
}
@Override
public boolean incrementToken() {
if (available) {
clearAttributes();
available = false;
return true;
}
return false;
}
@Override
public void reset() {
available = true;
}
public interface ByteTermAttribute extends TermToBytesRefAttribute {
public void setBytesRef(BytesRef bytes);
}
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
private BytesRef bytes;
@Override
public void fillBytesRef() {
// no-op: the bytes was already filled by our owner's incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
@Override
public void clear() {}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
}
}

View File

@ -108,11 +108,6 @@ public class Test2BTerms extends LuceneTestCase {
} }
private final static class MyTermAttributeImpl extends AttributeImpl implements TermToBytesRefAttribute { private final static class MyTermAttributeImpl extends AttributeImpl implements TermToBytesRefAttribute {
@Override
public void fillBytesRef() {
// no-op: the bytes was already filled by our owner's incrementToken
}
@Override @Override
public BytesRef getBytesRef() { public BytesRef getBytesRef() {
return bytes; return bytes;
@ -122,18 +117,9 @@ public class Test2BTerms extends LuceneTestCase {
public void clear() { public void clear() {
} }
@Override
public boolean equals(Object other) {
return other == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
@Override @Override
public void copyTo(AttributeImpl target) { public void copyTo(AttributeImpl target) {
throw new UnsupportedOperationException();
} }
@Override @Override

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
@ -38,7 +37,6 @@ public class TestBinaryTerms extends LuceneTestCase {
Directory dir = newDirectory(); Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir); RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
BytesRef bytes = new BytesRef(2); BytesRef bytes = new BytesRef(2);
BinaryTokenStream tokenStream = new BinaryTokenStream(bytes);
for (int i = 0; i < 256; i++) { for (int i = 0; i < 256; i++) {
bytes.bytes[0] = (byte) i; bytes.bytes[0] = (byte) i;
@ -47,8 +45,8 @@ public class TestBinaryTerms extends LuceneTestCase {
Document doc = new Document(); Document doc = new Document();
FieldType customType = new FieldType(); FieldType customType = new FieldType();
customType.setStored(true); customType.setStored(true);
doc.add(new Field("id", "" + i, customType)); doc.add(newField("id", "" + i, customType));
doc.add(new TextField("bytes", tokenStream)); doc.add(newStringField("bytes", bytes, Field.Store.NO));
iw.addDocument(doc); iw.addDocument(doc);
} }

View File

@ -49,14 +49,13 @@ public class TestLongPostings extends LuceneTestCase {
} }
try (TokenStream ts = a.tokenStream("foo", s)) { try (TokenStream ts = a.tokenStream("foo", s)) {
final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class); final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
final BytesRef termBytes = termAtt.getBytesRef();
ts.reset(); ts.reset();
int count = 0; int count = 0;
boolean changed = false; boolean changed = false;
while(ts.incrementToken()) { while(ts.incrementToken()) {
termAtt.fillBytesRef(); final BytesRef termBytes = termAtt.getBytesRef();
if (count == 0 && !termBytes.utf8ToString().equals(s)) { if (count == 0 && !termBytes.utf8ToString().equals(s)) {
// The value was changed during analysis. Keep iterating so the // The value was changed during analysis. Keep iterating so the
// tokenStream is exhausted. // tokenStream is exhausted.

View File

@ -23,17 +23,12 @@ import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
@ -94,78 +89,6 @@ public class TestPrefixQuery extends LuceneTestCase {
directory.close(); directory.close();
} }
static final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
private boolean available = true;
public BinaryTokenStream(BytesRef bytes) {
bytesAtt.setBytesRef(bytes);
}
@Override
public boolean incrementToken() {
if (available) {
clearAttributes();
available = false;
return true;
}
return false;
}
@Override
public void reset() {
available = true;
}
public interface ByteTermAttribute extends TermToBytesRefAttribute {
public void setBytesRef(BytesRef bytes);
}
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
private BytesRef bytes;
@Override
public void fillBytesRef() {
// no-op: the bytes was already filled by our owner's incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
@Override
public void clear() {}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
}
}
/** Basically a StringField that accepts binary term. */
private static class BinaryField extends Field {
final static FieldType TYPE;
static {
TYPE = new FieldType(StringField.TYPE_NOT_STORED);
// Necessary so our custom tokenStream is used by Field.tokenStream:
TYPE.setTokenized(true);
TYPE.freeze();
}
public BinaryField(String name, BytesRef value) {
super(name, new BinaryTokenStream(value), TYPE);
}
}
public void testRandomBinaryPrefix() throws Exception { public void testRandomBinaryPrefix() throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir); RandomIndexWriter w = new RandomIndexWriter(random(), dir);
@ -182,7 +105,7 @@ public class TestPrefixQuery extends LuceneTestCase {
Collections.shuffle(termsList, random()); Collections.shuffle(termsList, random());
for(BytesRef term : termsList) { for(BytesRef term : termsList) {
Document doc = new Document(); Document doc = new Document();
doc.add(new BinaryField("field", term)); doc.add(newStringField("field", term, Field.Store.NO));
w.addDocument(doc); w.addDocument(doc);
} }

View File

@ -167,14 +167,11 @@ public abstract class AbstractTestCase extends LuceneTestCase {
try (TokenStream tokenStream = analyzer.tokenStream(field, text)) { try (TokenStream tokenStream = analyzer.tokenStream(field, text)) {
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytesRef = termAttribute.getBytesRef();
tokenStream.reset(); tokenStream.reset();
while (tokenStream.incrementToken()) { while (tokenStream.incrementToken()) {
termAttribute.fillBytesRef(); bytesRefs.add(BytesRef.deepCopyOf(termAttribute.getBytesRef()));
bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
} }
tokenStream.end(); tokenStream.end();

View File

@ -453,18 +453,16 @@ public class MemoryIndex {
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
PayloadAttribute payloadAtt = storePayloads ? stream.addAttribute(PayloadAttribute.class) : null; PayloadAttribute payloadAtt = storePayloads ? stream.addAttribute(PayloadAttribute.class) : null;
BytesRef ref = termAtt.getBytesRef();
stream.reset(); stream.reset();
while (stream.incrementToken()) { while (stream.incrementToken()) {
termAtt.fillBytesRef();
// if (DEBUG) System.err.println("token='" + term + "'"); // if (DEBUG) System.err.println("token='" + term + "'");
numTokens++; numTokens++;
final int posIncr = posIncrAttribute.getPositionIncrement(); final int posIncr = posIncrAttribute.getPositionIncrement();
if (posIncr == 0) if (posIncr == 0)
numOverlapTokens++; numOverlapTokens++;
pos += posIncr; pos += posIncr;
int ord = terms.add(ref); int ord = terms.add(termAtt.getBytesRef());
if (ord < 0) { if (ord < 0) {
ord = (-ord) - 1; ord = (-ord) - 1;
postingsWriter.reset(sliceArray.end[ord]); postingsWriter.reset(sliceArray.end[ord]);

View File

@ -612,15 +612,14 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
source.reset(); source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken()) if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part); throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef(); BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
if (source.incrementToken()) if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part); throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
source.end(); source.end();
return BytesRef.deepCopyOf(bytes); return bytes;
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException("Error analyzing multiTerm term: " + part, e); throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
} }

View File

@ -53,11 +53,9 @@ public class SpanOrTermsBuilder extends SpanBuilderBase {
try (TokenStream ts = analyzer.tokenStream(fieldName, value)) { try (TokenStream ts = analyzer.tokenStream(fieldName, value)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset(); ts.reset();
while (ts.incrementToken()) { while (ts.incrementToken()) {
termAtt.fillBytesRef(); SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef())));
SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
clausesList.add(stq); clausesList.add(stq);
} }
ts.end(); ts.end();

View File

@ -55,11 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder {
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) { try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null; Term term = null;
BytesRef bytes = termAtt.getBytesRef();
ts.reset(); ts.reset();
while (ts.incrementToken()) { while (ts.incrementToken()) {
termAtt.fillBytesRef(); term = new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef()));
term = new Term(fieldName, BytesRef.deepCopyOf(bytes));
bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD)); bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
} }
ts.end(); ts.end();

View File

@ -61,8 +61,6 @@ public class TokenStreamToTermAutomatonQuery {
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class); final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class); final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class);
final BytesRef term = termBytesAtt.getBytesRef();
in.reset(); in.reset();
TermAutomatonQuery query = new TermAutomatonQuery(field); TermAutomatonQuery query = new TermAutomatonQuery(field);
@ -93,7 +91,7 @@ public class TokenStreamToTermAutomatonQuery {
state = query.createState(); state = query.createState();
} }
termBytesAtt.fillBytesRef(); BytesRef term = termBytesAtt.getBytesRef();
//System.out.println(pos + "-" + endPos + ": " + term.utf8ToString() + ": posInc=" + posInc); //System.out.println(pos + "-" + endPos + ": " + term.utf8ToString() + ": posInc=" + posInc);
if (term.length == 1 && term.bytes[term.offset] == (byte) '*') { if (term.length == 1 && term.bytes[term.offset] == (byte) '*') {
query.addAnyTransition(pos, endPos); query.addAnyTransition(pos, endPos);

View File

@ -20,11 +20,7 @@ package org.apache.lucene.spatial.prefix;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRefIterator;
@ -37,85 +33,6 @@ import org.apache.lucene.util.BytesRefIterator;
*/ */
class BytesRefIteratorTokenStream extends TokenStream { class BytesRefIteratorTokenStream extends TokenStream {
// just a wrapper to prevent adding CharTermAttribute
private static final class BRAttributeFactory extends AttributeFactory {
private final AttributeFactory delegate;
BRAttributeFactory(AttributeFactory delegate) {
this.delegate = delegate;
}
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
if (CharTermAttribute.class.isAssignableFrom(attClass))
throw new IllegalArgumentException(getClass() + " does not support CharTermAttribute.");
return delegate.createAttributeInstance(attClass);
}
}
private static final class BRTermToBytesRefAttributeImpl extends AttributeImpl
implements TermToBytesRefAttribute {
private final BytesRef bytes = new BytesRef();
void setBytesRef(BytesRef inputBytes) {
// shallow clone. this.bytesRef is final
bytes.bytes = inputBytes.bytes;
bytes.offset = inputBytes.offset;
bytes.length = inputBytes.length;
}
@Override
public void clear() {
// we keep it untouched as it's fully controlled by the outer class.
}
@Override
public void copyTo(AttributeImpl target) {
final BRTermToBytesRefAttributeImpl a = (BRTermToBytesRefAttributeImpl) target;
a.setBytesRef(BytesRef.deepCopyOf(bytes));
}
@Override
public void fillBytesRef() {
//nothing to do; it's populated by incrementToken
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public BRTermToBytesRefAttributeImpl clone() {
// super.clone won't work since we need a new BytesRef reference and it's nice to have it final. The superclass
// has no state to copy anyway.
final BRTermToBytesRefAttributeImpl clone = new BRTermToBytesRefAttributeImpl();
clone.setBytesRef(BytesRef.deepCopyOf(bytes));
return clone;
}
@Override
public int hashCode() {
return bytes.hashCode();
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
BRTermToBytesRefAttributeImpl other = (BRTermToBytesRefAttributeImpl) obj;
if (!bytes.equals(other.bytes)) return false;
return true;
}
}
public BytesRefIteratorTokenStream() {
super(new BRAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY));
addAttributeImpl(new BRTermToBytesRefAttributeImpl());//because non-public constructor
bytesAtt = (BRTermToBytesRefAttributeImpl) addAttribute(TermToBytesRefAttribute.class);
}
public BytesRefIterator getBytesRefIterator() { public BytesRefIterator getBytesRefIterator() {
return bytesIter; return bytesIter;
} }
@ -129,7 +46,6 @@ class BytesRefIteratorTokenStream extends TokenStream {
public void reset() throws IOException { public void reset() throws IOException {
if (bytesIter == null) if (bytesIter == null)
throw new IllegalStateException("call setBytesRefIterator() before usage"); throw new IllegalStateException("call setBytesRefIterator() before usage");
bytesAtt.getBytesRef().length = 0;
} }
@Override @Override
@ -137,14 +53,12 @@ class BytesRefIteratorTokenStream extends TokenStream {
if (bytesIter == null) if (bytesIter == null)
throw new IllegalStateException("call setBytesRefIterator() before usage"); throw new IllegalStateException("call setBytesRefIterator() before usage");
// this will only clear all other attributes in this TokenStream
clearAttributes();//TODO but there should be no "other" attributes
// get next // get next
BytesRef bytes = bytesIter.next(); BytesRef bytes = bytesIter.next();
if (bytes == null) { if (bytes == null) {
return false; return false;
} else { } else {
clearAttributes();
bytesAtt.setBytesRef(bytes); bytesAtt.setBytesRef(bytes);
//note: we don't bother setting posInc or type attributes. There's no point to it. //note: we don't bother setting posInc or type attributes. There's no point to it.
return true; return true;
@ -152,7 +66,7 @@ class BytesRefIteratorTokenStream extends TokenStream {
} }
//members //members
private final BRTermToBytesRefAttributeImpl bytesAtt; private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class);
private BytesRefIterator bytesIter = null; // null means not initialized private BytesRefIterator bytesIter = null; // null means not initialized

View File

@ -486,11 +486,10 @@ public class FreeTextSuggester extends Lookup implements Accountable {
// Run full analysis, but save only the // Run full analysis, but save only the
// last 1gram, last 2gram, etc.: // last 1gram, last 2gram, etc.:
BytesRef tokenBytes = termBytesAtt.getBytesRef();
int maxEndOffset = -1; int maxEndOffset = -1;
boolean sawRealToken = false; boolean sawRealToken = false;
while(ts.incrementToken()) { while(ts.incrementToken()) {
termBytesAtt.fillBytesRef(); BytesRef tokenBytes = termBytesAtt.getBytesRef();
sawRealToken |= tokenBytes.length > 0; sawRealToken |= tokenBytes.length > 0;
// TODO: this is somewhat iffy; today, ShingleFilter // TODO: this is somewhat iffy; today, ShingleFilter
// sets posLen to the gram count; maybe we should make // sets posLen to the gram count; maybe we should make

View File

@ -58,7 +58,7 @@ public final class CompletionTokenStream extends TokenStream {
private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class); private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class);
private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class); private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class);
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class); private final BytesRefBuilderTermAttribute bytesAtt = addAttribute(BytesRefBuilderTermAttribute.class);
private final TokenStream input; private final TokenStream input;
final boolean preserveSep; final boolean preserveSep;
@ -309,9 +309,7 @@ public final class CompletionTokenStream extends TokenStream {
/** /**
* Attribute providing access to the term builder and UTF-16 conversion * Attribute providing access to the term builder and UTF-16 conversion
*/ */
private interface ByteTermAttribute extends TermToBytesRefAttribute { private interface BytesRefBuilderTermAttribute extends TermToBytesRefAttribute {
// marker interface
/** /**
* Returns the builder from which the term is derived. * Returns the builder from which the term is derived.
*/ */
@ -326,20 +324,15 @@ public final class CompletionTokenStream extends TokenStream {
/** /**
* Custom attribute implementation for completion token stream * Custom attribute implementation for completion token stream
*/ */
public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute { public static final class BytesRefBuilderTermAttributeImpl extends AttributeImpl implements BytesRefBuilderTermAttribute, TermToBytesRefAttribute {
private final BytesRefBuilder bytes = new BytesRefBuilder(); private final BytesRefBuilder bytes = new BytesRefBuilder();
private CharsRefBuilder charsRef; private transient CharsRefBuilder charsRef;
/** /**
* Sole constructor * Sole constructor
* no-op * no-op
*/ */
public ByteTermAttributeImpl() { public BytesRefBuilderTermAttributeImpl() {
}
@Override
public void fillBytesRef() {
// does nothing - we change in place
} }
@Override @Override
@ -359,10 +352,17 @@ public final class CompletionTokenStream extends TokenStream {
@Override @Override
public void copyTo(AttributeImpl target) { public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target; BytesRefBuilderTermAttributeImpl other = (BytesRefBuilderTermAttributeImpl) target;
other.bytes.copyBytes(bytes); other.bytes.copyBytes(bytes);
} }
@Override
public AttributeImpl clone() {
BytesRefBuilderTermAttributeImpl other = new BytesRefBuilderTermAttributeImpl();
copyTo(other);
return other;
}
@Override @Override
public CharSequence toUTF16() { public CharSequence toUTF16() {
if (charsRef == null) { if (charsRef == null) {

View File

@ -51,7 +51,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.Input; import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LineFileDocs;

View File

@ -17,13 +17,11 @@ package org.apache.lucene.analysis;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
/** /**
* TokenStream from a canned list of binary (BytesRef-based) * TokenStream from a canned list of binary (BytesRef-based)
@ -54,65 +52,11 @@ public final class CannedBinaryTokenStream extends TokenStream {
private final BinaryToken[] tokens; private final BinaryToken[] tokens;
private int upto = 0; private int upto = 0;
private final BinaryTermAttribute termAtt = addAttribute(BinaryTermAttribute.class); private final BytesTermAttribute termAtt = addAttribute(BytesTermAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class); private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/** An attribute extending {@link
* TermToBytesRefAttribute} but exposing {@link
* #setBytesRef} method. */
public interface BinaryTermAttribute extends TermToBytesRefAttribute {
/** Set the current binary value. */
public void setBytesRef(BytesRef bytes);
}
/** Implementation for {@link BinaryTermAttribute}. */
public final static class BinaryTermAttributeImpl extends AttributeImpl implements BinaryTermAttribute, TermToBytesRefAttribute {
private final BytesRefBuilder bytes = new BytesRefBuilder();
@Override
public void fillBytesRef() {
bytes.get(); // sets the length on the bytesref
}
@Override
public BytesRef getBytesRef() {
return bytes.get();
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes.copyBytes(bytes);
}
@Override
public void clear() {
}
@Override
public boolean equals(Object other) {
return other == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
@Override
public void copyTo(AttributeImpl target) {
BinaryTermAttributeImpl other = (BinaryTermAttributeImpl) target;
other.bytes.copyBytes(bytes);
}
@Override
public BinaryTermAttributeImpl clone() {
throw new UnsupportedOperationException();
}
}
public CannedBinaryTokenStream(BinaryToken... tokens) { public CannedBinaryTokenStream(BinaryToken... tokens) {
super(); super();
this.tokens = tokens; this.tokens = tokens;

View File

@ -183,12 +183,10 @@ public abstract class CollationTestBase extends LuceneTestCase {
String term = TestUtil.randomSimpleString(random()); String term = TestUtil.randomSimpleString(random());
try (TokenStream ts = analyzer.tokenStream("fake", term)) { try (TokenStream ts = analyzer.tokenStream("fake", term)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset(); ts.reset();
assertTrue(ts.incrementToken()); assertTrue(ts.incrementToken());
termAtt.fillBytesRef();
// ensure we make a copy of the actual bytes too // ensure we make a copy of the actual bytes too
map.put(term, BytesRef.deepCopyOf(bytes)); map.put(term, BytesRef.deepCopyOf(termAtt.getBytesRef()));
assertFalse(ts.incrementToken()); assertFalse(ts.incrementToken());
ts.end(); ts.end();
} }
@ -205,11 +203,9 @@ public abstract class CollationTestBase extends LuceneTestCase {
BytesRef expected = mapping.getValue(); BytesRef expected = mapping.getValue();
try (TokenStream ts = analyzer.tokenStream("fake", term)) { try (TokenStream ts = analyzer.tokenStream("fake", term)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset(); ts.reset();
assertTrue(ts.incrementToken()); assertTrue(ts.incrementToken());
termAtt.fillBytesRef(); assertEquals(expected, termAtt.getBytesRef());
assertEquals(expected, bytes);
assertFalse(ts.incrementToken()); assertFalse(ts.incrementToken());
ts.end(); ts.end();
} }

View File

@ -34,11 +34,11 @@ public class MockUTF16TermAttributeImpl extends CharTermAttributeImpl {
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, MockUTF16TermAttributeImpl.class); AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, MockUTF16TermAttributeImpl.class);
@Override @Override
public void fillBytesRef() { public BytesRef getBytesRef() {
BytesRef bytes = getBytesRef(); final BytesRef ref = this.builder.get();
byte[] utf16 = toString().getBytes(StandardCharsets.UTF_16LE); ref.bytes = toString().getBytes(StandardCharsets.UTF_16LE);
bytes.bytes = utf16; ref.offset = 0;
bytes.offset = 0; ref.length = ref.bytes.length;
bytes.length = utf16.length; return ref;
} }
} }

View File

@ -252,16 +252,16 @@ public class ICUCollationField extends FieldType {
source.reset(); source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
// we control the analyzer here: most errors are impossible // we control the analyzer here: most errors are impossible
if (!source.incrementToken()) if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for text: " + text); throw new IllegalArgumentException("analyzer returned no terms for text: " + text);
termAtt.fillBytesRef(); BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
assert !source.incrementToken(); assert !source.incrementToken();
source.end(); source.end();
return BytesRef.deepCopyOf(bytes); return bytes;
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException("Unable to analyze text: " + text, e); throw new RuntimeException("Unable to analyze text: " + text, e);
} }

View File

@ -148,13 +148,11 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
try (TokenStream tokenStream = analyzer.tokenStream("", query)){ try (TokenStream tokenStream = analyzer.tokenStream("", query)){
final Set<BytesRef> tokens = new HashSet<>(); final Set<BytesRef> tokens = new HashSet<>();
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
final BytesRef bytes = bytesAtt.getBytesRef();
tokenStream.reset(); tokenStream.reset();
while (tokenStream.incrementToken()) { while (tokenStream.incrementToken()) {
bytesAtt.fillBytesRef(); tokens.add(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
tokens.add(BytesRef.deepCopyOf(bytes));
} }
tokenStream.end(); tokenStream.end();
@ -246,7 +244,6 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<>(); final NamedList<Object> tokenNamedList = new SimpleOrderedMap<>();
final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class); final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
BytesRef rawBytes = termAtt.getBytesRef(); BytesRef rawBytes = termAtt.getBytesRef();
termAtt.fillBytesRef();
final String text = fieldType.indexedToReadable(rawBytes, new CharsRefBuilder()).toString(); final String text = fieldType.indexedToReadable(rawBytes, new CharsRefBuilder()).toString();
tokenNamedList.add("text", text); tokenNamedList.add("text", text);

View File

@ -224,16 +224,14 @@ public class CollationField extends FieldType {
try (TokenStream source = analyzer.tokenStream(field, text)) { try (TokenStream source = analyzer.tokenStream(field, text)) {
source.reset(); source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
// we control the analyzer here: most errors are impossible // we control the analyzer here: most errors are impossible
if (!source.incrementToken()) if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for text: " + text); throw new IllegalArgumentException("analyzer returned no terms for text: " + text);
termAtt.fillBytesRef(); BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
assert !source.incrementToken(); assert !source.incrementToken();
source.end(); source.end();
return BytesRef.deepCopyOf(bytes); return bytes;
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException("Unable to analyze text: " + text, e); throw new RuntimeException("Unable to analyze text: " + text, e);
} }

View File

@ -146,16 +146,15 @@ public class TextField extends FieldType {
source.reset(); source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken()) if (!source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef(); BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
if (source.incrementToken()) if (source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
source.end(); source.end();
return BytesRef.deepCopyOf(bytes); return bytes;
} catch (IOException e) { } catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
} }

View File

@ -267,10 +267,9 @@ public class CursorMarkTest extends SolrTestCaseJ4 {
String term = TestUtil.randomRealisticUnicodeString(random()); String term = TestUtil.randomRealisticUnicodeString(random());
try (TokenStream ts = analyzer.tokenStream("fake", term)) { try (TokenStream ts = analyzer.tokenStream("fake", term)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
val = termAtt.getBytesRef();
ts.reset(); ts.reset();
assertTrue(ts.incrementToken()); assertTrue(ts.incrementToken());
termAtt.fillBytesRef(); val = BytesRef.deepCopyOf(termAtt.getBytesRef());
assertFalse(ts.incrementToken()); assertFalse(ts.incrementToken());
ts.end(); ts.end();
} }