diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9bc2715f85f..732ba07c5b7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -116,6 +116,10 @@ New Features * LUCENE-6632: Geo3D: Compute circle planes more accurately. (Karl Wright via David Smiley) +* LUCENE-6653: Added general purpose BytesTermAttribute to basic token + attributes package that can be used for TokenStreams that solely produce + binary terms. (Uwe Schindler) + API Changes * LUCENE-6508: Simplify Lock api, there is now just @@ -295,6 +299,9 @@ Test Framework environments (e.g., read-only dirs). If tests are running without a security manager, an assume cancels test execution automatically. (Uwe Schindler) +* LUCENE-6652: Removed lots of useless Byte(s)TermAttributes all over test + infrastructure. (Uwe Schindler) + Changes in Backwards Compatibility Policy * LUCENE-6553: The iterator returned by the LeafReader.postings method now @@ -305,6 +312,11 @@ Changes in Backwards Compatibility Policy DiversifiedTopDocsCollector can be used instead with a maximum number of hits per key equal to 1. (Adrien Grand) +* LUCENE-6653: The workflow for consuming the TermToBytesRefAttribute was changed: + getBytesRef() now does all work and is called on each token, fillBytesRef() + was removed. The implementation is free to reuse the internal BytesRef + or return a new one on each call. (Uwe Schindler) + ======================= Lucene 5.2.1 ======================= Bug Fixes diff --git a/lucene/analysis/common/src/java/org/apache/lucene/collation/tokenattributes/CollatedTermAttributeImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/collation/tokenattributes/CollatedTermAttributeImpl.java index 8336aabc344..e8f7076df23 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/collation/tokenattributes/CollatedTermAttributeImpl.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/collation/tokenattributes/CollatedTermAttributeImpl.java @@ -40,11 +40,12 @@ public class CollatedTermAttributeImpl extends CharTermAttributeImpl { } @Override - public void fillBytesRef() { - BytesRef bytes = getBytesRef(); - bytes.bytes = collator.getCollationKey(toString()).toByteArray(); - bytes.offset = 0; - bytes.length = bytes.bytes.length; + public BytesRef getBytesRef() { + final BytesRef ref = this.builder.get(); + ref.bytes = collator.getCollationKey(toString()).toByteArray(); + ref.offset = 0; + ref.length = ref.bytes.length; + return ref; } } diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/collation/tokenattributes/ICUCollatedTermAttributeImpl.java b/lucene/analysis/icu/src/java/org/apache/lucene/collation/tokenattributes/ICUCollatedTermAttributeImpl.java index e9ee831fea7..fdbaf74e610 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/collation/tokenattributes/ICUCollatedTermAttributeImpl.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/collation/tokenattributes/ICUCollatedTermAttributeImpl.java @@ -45,11 +45,12 @@ public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl { } @Override - public void fillBytesRef() { - BytesRef bytes = getBytesRef(); + public BytesRef getBytesRef() { collator.getRawCollationKey(toString(), key); - bytes.bytes = key.bytes; - bytes.offset = 0; - bytes.length = key.size; + final BytesRef ref = this.builder.get(); + ref.bytes = key.bytes; + ref.offset = 0; + ref.length = key.size; + return ref; } } diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttributeImpl.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttributeImpl.java index aad39bcbca3..938308e0f64 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttributeImpl.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttributeImpl.java @@ -96,9 +96,4 @@ public class MorphosyntacticTagsAttributeImpl extends AttributeImpl this.copyTo(cloned); return cloned; } - - @Override - public String toString() { - return tags == null ? "" : tags.toString(); - } } diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java index f8bfaeffce4..57c206e73c5 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java @@ -87,7 +87,7 @@ public class ReadTokensTask extends PerfTask { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); while(stream.incrementToken()) { - termAtt.fillBytesRef(); + termAtt.getBytesRef(); tokenCount++; } stream.end(); diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index d894f01cd23..0c82cb55b55 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -976,8 +976,6 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { assertTrue(ts2.incrementToken()); BytesRef bytes1 = termAtt1.getBytesRef(); BytesRef bytes2 = termAtt2.getBytesRef(); - termAtt1.fillBytesRef(); - termAtt2.fillBytesRef(); assertEquals(bytes1, bytes2); assertFalse(ts1.incrementToken()); assertFalse(ts2.incrementToken()); diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java b/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java index d238e2d6c19..7514f27f9df 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java @@ -29,8 +29,6 @@ import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; @@ -58,7 +56,6 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; @@ -234,7 +231,7 @@ public class TestAutoPrefixTerms extends LuceneTestCase { for(Integer term : terms) { Document doc = new Document(); - doc.add(new BinaryField("field", intToBytes(term))); + doc.add(newStringField("field", intToBytes(term), Field.Store.NO)); doc.add(new NumericDocValuesField("field", term)); w.addDocument(doc); } @@ -506,78 +503,6 @@ public class TestAutoPrefixTerms extends LuceneTestCase { dir.close(); } - static final class BinaryTokenStream extends TokenStream { - private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class); - private boolean available = true; - - public BinaryTokenStream(BytesRef bytes) { - bytesAtt.setBytesRef(bytes); - } - - @Override - public boolean incrementToken() { - if (available) { - clearAttributes(); - available = false; - return true; - } - return false; - } - - @Override - public void reset() { - available = true; - } - - public interface ByteTermAttribute extends TermToBytesRefAttribute { - void setBytesRef(BytesRef bytes); - } - - public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute { - private BytesRef bytes; - - @Override - public void fillBytesRef() { - // no-op: the bytes was already filled by our owner's incrementToken - } - - @Override - public BytesRef getBytesRef() { - return bytes; - } - - @Override - public void setBytesRef(BytesRef bytes) { - this.bytes = bytes; - } - - @Override - public void clear() {} - - @Override - public void copyTo(AttributeImpl target) { - ByteTermAttributeImpl other = (ByteTermAttributeImpl) target; - other.bytes = bytes; - } - } - } - - /** Basically a StringField that accepts binary term. */ - private static class BinaryField extends Field { - - final static FieldType TYPE; - static { - TYPE = new FieldType(StringField.TYPE_NOT_STORED); - // Necessary so our custom tokenStream is used by Field.tokenStream: - TYPE.setTokenized(true); - TYPE.freeze(); - } - - public BinaryField(String name, BytesRef value) { - super(name, new BinaryTokenStream(value), TYPE); - } - } - /** Helper class to ensure auto-prefix terms 1) never overlap one another, and 2) are used when they should be. */ private static class VerifyAutoPrefixTerms { final FixedBitSet allHits; diff --git a/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java b/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java index b4bdb4e0212..d3210f6119d 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java @@ -158,17 +158,13 @@ public final class NumericTokenStream extends TokenStream { @Override public BytesRef getBytesRef() { - return bytes.get(); - } - - @Override - public void fillBytesRef() { assert valueSize == 64 || valueSize == 32; if (valueSize == 64) { NumericUtils.longToPrefixCoded(value, shift, bytes); } else { NumericUtils.intToPrefixCoded((int) value, shift, bytes); } + return bytes.get(); } @Override @@ -201,8 +197,7 @@ public final class NumericTokenStream extends TokenStream { @Override public void reflectWith(AttributeReflector reflector) { - fillBytesRef(); - reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes.toBytesRef()); + reflector.reflect(TermToBytesRefAttribute.class, "bytes", getBytesRef()); reflector.reflect(NumericTermAttribute.class, "shift", shift); reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue()); reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize); @@ -219,7 +214,7 @@ public final class NumericTokenStream extends TokenStream { NumericTermAttributeImpl t = (NumericTermAttributeImpl)super.clone(); // Do a deep clone t.bytes = new BytesRefBuilder(); - t.bytes.copyBytes(bytes.get()); + t.bytes.copyBytes(getBytesRef()); return t; } diff --git a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java index 5899e998a1f..b62a5ff01eb 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java @@ -105,8 +105,6 @@ public class TokenStreamToAutomaton { final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class); final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class); - final BytesRef term = termBytesAtt.getBytesRef(); - in.reset(); // Only temporarily holds states ahead of our current @@ -157,8 +155,7 @@ public class TokenStreamToAutomaton { final int endPos = pos + posLengthAtt.getPositionLength(); - termBytesAtt.fillBytesRef(); - final BytesRef termUTF8 = changeToken(term); + final BytesRef termUTF8 = changeToken(termBytesAtt.getBytesRef()); int[] termUnicode = null; final Position endPosData = positions.get(endPos); if (endPosData.arriving == -1) { diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/BytesTermAttribute.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/BytesTermAttribute.java new file mode 100644 index 00000000000..2085e5fa386 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/BytesTermAttribute.java @@ -0,0 +1,31 @@ +package org.apache.lucene.analysis.tokenattributes; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.BytesRef; + +/** + * This attribute can be used if you have the raw term bytes to be indexed. + * It can be used as replacement for {@link CharTermAttribute}, if binary + * terms should be indexed. + * @lucene.internal + */ +public interface BytesTermAttribute extends TermToBytesRefAttribute { + /** Sets the {@link BytesRef} of the term */ + public void setBytesRef(BytesRef bytes); +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/BytesTermAttributeImpl.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/BytesTermAttributeImpl.java new file mode 100644 index 00000000000..d3f8163f402 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/BytesTermAttributeImpl.java @@ -0,0 +1,65 @@ +package org.apache.lucene.analysis.tokenattributes; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; +import org.apache.lucene.util.BytesRef; + +/** Implementation class for {@link BytesTermAttribute}. + * @lucene.internal + */ +public class BytesTermAttributeImpl extends AttributeImpl implements BytesTermAttribute, TermToBytesRefAttribute { + private BytesRef bytes; + + /** Initialize this attribute with no bytes. */ + public BytesTermAttributeImpl() {} + + @Override + public BytesRef getBytesRef() { + return bytes; + } + + @Override + public void setBytesRef(BytesRef bytes) { + this.bytes = bytes; + } + + @Override + public void clear() { + this.bytes = null; + } + + @Override + public void copyTo(AttributeImpl target) { + BytesTermAttributeImpl other = (BytesTermAttributeImpl) target; + other.bytes = BytesRef.deepCopyOf(bytes); + } + + @Override + public AttributeImpl clone() { + BytesTermAttributeImpl c = (BytesTermAttributeImpl)super.clone(); + copyTo(c); + return c; + } + + @Override + public void reflectWith(AttributeReflector reflector) { + reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes); + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java index 6d325a1749d..d75da21dd68 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java @@ -33,6 +33,9 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)]; private int termLength = 0; + /** May be used by subclasses to convert to different charsets / encodings for implementing {@link #getBytesRef()}. */ + protected BytesRefBuilder builder = new BytesRefBuilder(); + /** Initialize this attribute with empty term text */ public CharTermAttributeImpl() {} @@ -83,16 +86,11 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr } // *** TermToBytesRefAttribute interface *** - private BytesRefBuilder bytes = new BytesRefBuilder(); - - @Override - public void fillBytesRef() { - bytes.copyChars(termBuffer, 0, termLength); - } @Override public BytesRef getBytesRef() { - return bytes.get(); + builder.copyChars(termBuffer, 0, termLength); + return builder.get(); } // *** CharSequence interface *** @@ -228,8 +226,8 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr // Do a deep clone t.termBuffer = new char[this.termLength]; System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength); - t.bytes = new BytesRefBuilder(); - t.bytes.copyBytes(bytes.get()); + t.builder = new BytesRefBuilder(); + t.builder.copyBytes(builder.get()); return t; } @@ -271,8 +269,7 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr @Override public void reflectWith(AttributeReflector reflector) { reflector.reflect(CharTermAttribute.class, "term", toString()); - fillBytesRef(); - reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes.toBytesRef()); + reflector.reflect(TermToBytesRefAttribute.class, "bytes", getBytesRef()); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/TermToBytesRefAttribute.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/TermToBytesRefAttribute.java index d9d154a9c4d..f6630da760f 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/TermToBytesRefAttribute.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/TermToBytesRefAttribute.java @@ -24,17 +24,12 @@ import org.apache.lucene.util.BytesRef; * This attribute is requested by TermsHashPerField to index the contents. * This attribute can be used to customize the final byte[] encoding of terms. *

- * Consumers of this attribute call {@link #getBytesRef()} up-front, and then - * invoke {@link #fillBytesRef()} for each term. Example: + * Consumers of this attribute call {@link #getBytesRef()} for each term. Example: *

  *   final TermToBytesRefAttribute termAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
- *   final BytesRef bytes = termAtt.getBytesRef();
  *
  *   while (tokenStream.incrementToken() {
- *
- *     // you must call termAtt.fillBytesRef() before doing something with the bytes.
- *     // this encodes the term value (internally it might be a char[], etc) into the bytes.
- *     int hashCode = termAtt.fillBytesRef();
+ *     final BytesRef bytes = termAtt.getBytesRef();
  *
  *     if (isInteresting(bytes)) {
  *     
@@ -42,27 +37,21 @@ import org.apache.lucene.util.BytesRef;
  *       // you should make a copy if you need persistent access to the bytes, otherwise they will
  *       // be rewritten across calls to incrementToken()
  *
- *       doSomethingWith(new BytesRef(bytes));
+ *       doSomethingWith(BytesRef.deepCopyOf(bytes));
  *     }
  *   }
  *   ...
  * 
- * @lucene.experimental This is a very expert API, please use - * {@link CharTermAttributeImpl} and its implementation of this method - * for UTF-8 terms. + * @lucene.internal This is a very expert and internal API, please use + * {@link CharTermAttribute} and its implementation for UTF-8 terms; to + * index binary terms, use {@link BytesTermAttribute} and its implementation. */ public interface TermToBytesRefAttribute extends Attribute { - - /** - * Updates the bytes {@link #getBytesRef()} to contain this term's - * final encoding. - */ - public void fillBytesRef(); /** - * Retrieve this attribute's BytesRef. The bytes are updated - * from the current term when the consumer calls {@link #fillBytesRef()}. - * @return this Attributes internal BytesRef. + * Retrieve this attribute's BytesRef. The bytes are updated from the current term. + * The implementation may return a new instance or keep the previous one. + * @return a BytesRef to be indexed (only stays valid until token stream gets incremented) */ public BytesRef getBytesRef(); } diff --git a/lucene/core/src/java/org/apache/lucene/document/Field.java b/lucene/core/src/java/org/apache/lucene/document/Field.java index 3bd120ab31c..d0ca60e441d 100644 --- a/lucene/core/src/java/org/apache/lucene/document/Field.java +++ b/lucene/core/src/java/org/apache/lucene/document/Field.java @@ -23,17 +23,14 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.NumericTokenStream; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.document.FieldType.NumericType; -import org.apache.lucene.index.FieldInvertState; // javadocs import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexWriter; // javadocs import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.index.StorableField; -import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.BytesRef; /** @@ -570,71 +567,47 @@ public class Field implements IndexableField, StorableField { } private static final class BinaryTokenStream extends TokenStream { - private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class); - - // Do not init this to true, becase caller must first call reset: - private boolean available; + private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class); + private boolean used = true; + private BytesRef value; - public BinaryTokenStream() { + /** Creates a new TokenStream that returns a BytesRef as single token. + *

Warning: Does not initialize the value, you must call + * {@link #setValue(BytesRef)} afterwards! + */ + BinaryTokenStream() { } public void setValue(BytesRef value) { - bytesAtt.setBytesRef(value); + this.value = value; } @Override public boolean incrementToken() { - if (available) { - clearAttributes(); - available = false; - return true; + if (used) { + return false; } - return false; + clearAttributes(); + bytesAtt.setBytesRef(value); + used = true; + return true; } @Override public void reset() { - available = true; + used = false; } - - public interface ByteTermAttribute extends TermToBytesRefAttribute { - public void setBytesRef(BytesRef bytes); - } - - public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute { - private BytesRef bytes; - - @Override - public void fillBytesRef() { - // no-op: the bytes was already filled by our owner's incrementToken - } - - @Override - public BytesRef getBytesRef() { - return bytes; - } - @Override - public void setBytesRef(BytesRef bytes) { - this.bytes = bytes; - } - - @Override - public void clear() { - } - - @Override - public void copyTo(AttributeImpl target) { - ByteTermAttributeImpl other = (ByteTermAttributeImpl) target; - other.bytes = bytes; - } + @Override + public void close() { + value = null; } } private static final class StringTokenStream extends TokenStream { private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); - private boolean used = false; + private boolean used = true; private String value = null; /** Creates a new TokenStream that returns a String as single token. diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java index 4c51e5c7122..3ae00e697b7 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java @@ -36,7 +36,6 @@ abstract class TermsHashPerField implements Comparable { protected final DocumentsWriterPerThread.DocState docState; protected final FieldInvertState fieldState; TermToBytesRefAttribute termAtt; - BytesRef termBytesRef; // Copied from our perThread final IntBlockPool intPool; @@ -145,13 +144,10 @@ abstract class TermsHashPerField implements Comparable { * entry point (for first TermsHash); postings use this * API. */ void add() throws IOException { - - termAtt.fillBytesRef(); - // We are first in the chain so we must "intern" the // term text into textStart address // Get the text & hash of this term. - int termID = bytesHash.add(termBytesRef); + int termID = bytesHash.add(termAtt.getBytesRef()); //System.out.println("add term=" + termBytesRef.utf8ToString() + " doc=" + docState.docID + " termID=" + termID); @@ -292,10 +288,6 @@ abstract class TermsHashPerField implements Comparable { * document. */ boolean start(IndexableField field, boolean first) { termAtt = fieldState.termAttribute; - // EmptyTokenStream can have null term att - if (termAtt != null) { - termBytesRef = termAtt.getBytesRef(); - } if (nextPerField != null) { doNextCall = nextPerField.start(field, first); } diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java index fb3a3f62f72..7faeb0a43d4 100644 --- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java @@ -267,15 +267,13 @@ public class QueryBuilder { */ private Query analyzeTerm(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); stream.reset(); if (!stream.incrementToken()) { throw new AssertionError(); } - termAtt.fillBytesRef(); - return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))); + return newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()))); } /** @@ -286,12 +284,10 @@ public class QueryBuilder { q.setDisableCoord(true); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); stream.reset(); while (stream.incrementToken()) { - termAtt.fillBytesRef(); - Query currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))); + Query currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()))); q.add(currentQuery, BooleanClause.Occur.SHOULD); } @@ -317,18 +313,15 @@ public class QueryBuilder { BooleanQuery.Builder currentQuery = newBooleanQuery(true); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); - PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); stream.reset(); while (stream.incrementToken()) { - termAtt.fillBytesRef(); if (posIncrAtt.getPositionIncrement() != 0) { add(q, currentQuery.build(), operator); currentQuery = newBooleanQuery(true); } - currentQuery.add(newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))), BooleanClause.Occur.SHOULD); + currentQuery.add(newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()))), BooleanClause.Occur.SHOULD); } add(q, currentQuery.build(), operator); @@ -343,21 +336,17 @@ public class QueryBuilder { builder.setSlop(slop); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); - PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); int position = -1; stream.reset(); while (stream.incrementToken()) { - termAtt.fillBytesRef(); - if (enablePositionIncrements) { position += posIncrAtt.getPositionIncrement(); } else { position += 1; } - builder.add(new Term(field, bytes), position); + builder.add(new Term(field, termAtt.getBytesRef()), position); } return builder.build(); @@ -371,7 +360,6 @@ public class QueryBuilder { mpq.setSlop(slop); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); int position = -1; @@ -379,7 +367,6 @@ public class QueryBuilder { List multiTerms = new ArrayList<>(); stream.reset(); while (stream.incrementToken()) { - termAtt.fillBytesRef(); int positionIncrement = posIncrAtt.getPositionIncrement(); if (positionIncrement > 0 && multiTerms.size() > 0) { @@ -391,7 +378,7 @@ public class QueryBuilder { multiTerms.clear(); } position += positionIncrement; - multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes))); + multiTerms.add(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()))); } if (enablePositionIncrements) { diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java b/lucene/core/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java index 5424eb397dd..331a70cab7a 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java @@ -17,7 +17,6 @@ package org.apache.lucene.analysis; * limitations under the License. */ -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.analysis.NumericTokenStream.NumericTermAttributeImpl; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; @@ -40,14 +39,12 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase { assertNotNull(typeAtt); final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); assertNotNull(numericAtt); - final BytesRef bytes = bytesAtt.getBytesRef(); stream.reset(); assertEquals(64, numericAtt.getValueSize()); for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); assertEquals("Shift value wrong", shift, numericAtt.getShift()); - bytesAtt.fillBytesRef(); - assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes)); + assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytesAtt.getBytesRef())); assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } @@ -65,14 +62,12 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase { assertNotNull(typeAtt); final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); assertNotNull(numericAtt); - final BytesRef bytes = bytesAtt.getBytesRef(); stream.reset(); assertEquals(32, numericAtt.getValueSize()); for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); assertEquals("Shift value wrong", shift, numericAtt.getShift()); - bytesAtt.fillBytesRef(); - assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes)); + assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytesAtt.getBytesRef())); assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } @@ -123,6 +118,7 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase { public void testAttributeClone() throws Exception { NumericTermAttributeImpl att = new NumericTermAttributeImpl(); + att.init(1234L, 64, 8, 0); // set some value, to make getBytesRef() work NumericTermAttributeImpl copy = TestCharTermAttributeImpl.assertCloneIsEqual(att); assertNotSame(att.getBytesRef(), copy.getBytesRef()); NumericTermAttributeImpl copy2 = TestCharTermAttributeImpl.assertCopyIsEqual(att); diff --git a/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java b/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java deleted file mode 100644 index fcaa4b9e5e1..00000000000 --- a/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java +++ /dev/null @@ -1,86 +0,0 @@ -package org.apache.lucene.index; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.util.AttributeImpl; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.analysis.CannedBinaryTokenStream; // javadocs - -/** - * A binary tokenstream that lets you index a single - * binary token (BytesRef value). - * - * @see CannedBinaryTokenStream - */ -public final class BinaryTokenStream extends TokenStream { - private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class); - private boolean available = true; - - public BinaryTokenStream(BytesRef bytes) { - bytesAtt.setBytesRef(bytes); - } - - @Override - public boolean incrementToken() { - if (available) { - clearAttributes(); - available = false; - return true; - } - return false; - } - - @Override - public void reset() { - available = true; - } - - public interface ByteTermAttribute extends TermToBytesRefAttribute { - public void setBytesRef(BytesRef bytes); - } - - public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute { - private BytesRef bytes; - - @Override - public void fillBytesRef() { - // no-op: the bytes was already filled by our owner's incrementToken - } - - @Override - public BytesRef getBytesRef() { - return bytes; - } - - @Override - public void setBytesRef(BytesRef bytes) { - this.bytes = bytes; - } - - @Override - public void clear() {} - - @Override - public void copyTo(AttributeImpl target) { - ByteTermAttributeImpl other = (ByteTermAttributeImpl) target; - other.bytes = bytes; - } - } -} diff --git a/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java b/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java index 49df315e4ee..a505ccf3725 100644 --- a/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java +++ b/lucene/core/src/test/org/apache/lucene/index/Test2BTerms.java @@ -108,11 +108,6 @@ public class Test2BTerms extends LuceneTestCase { } private final static class MyTermAttributeImpl extends AttributeImpl implements TermToBytesRefAttribute { - @Override - public void fillBytesRef() { - // no-op: the bytes was already filled by our owner's incrementToken - } - @Override public BytesRef getBytesRef() { return bytes; @@ -122,18 +117,9 @@ public class Test2BTerms extends LuceneTestCase { public void clear() { } - @Override - public boolean equals(Object other) { - return other == this; - } - - @Override - public int hashCode() { - return System.identityHashCode(this); - } - @Override public void copyTo(AttributeImpl target) { + throw new UnsupportedOperationException(); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/index/TestBinaryTerms.java b/lucene/core/src/test/org/apache/lucene/index/TestBinaryTerms.java index 9953fc7a877..1257edae1ea 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestBinaryTerms.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestBinaryTerms.java @@ -22,7 +22,6 @@ import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.TextField; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; @@ -38,7 +37,6 @@ public class TestBinaryTerms extends LuceneTestCase { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); BytesRef bytes = new BytesRef(2); - BinaryTokenStream tokenStream = new BinaryTokenStream(bytes); for (int i = 0; i < 256; i++) { bytes.bytes[0] = (byte) i; @@ -47,8 +45,8 @@ public class TestBinaryTerms extends LuceneTestCase { Document doc = new Document(); FieldType customType = new FieldType(); customType.setStored(true); - doc.add(new Field("id", "" + i, customType)); - doc.add(new TextField("bytes", tokenStream)); + doc.add(newField("id", "" + i, customType)); + doc.add(newStringField("bytes", bytes, Field.Store.NO)); iw.addDocument(doc); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java b/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java index afa39ffc25c..1fe64067c0b 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java @@ -49,14 +49,13 @@ public class TestLongPostings extends LuceneTestCase { } try (TokenStream ts = a.tokenStream("foo", s)) { final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class); - final BytesRef termBytes = termAtt.getBytesRef(); ts.reset(); int count = 0; boolean changed = false; while(ts.incrementToken()) { - termAtt.fillBytesRef(); + final BytesRef termBytes = termAtt.getBytesRef(); if (count == 0 && !termBytes.utf8ToString().equals(s)) { // The value was changed during analysis. Keep iterating so the // tokenStream is exhausted. diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java index 64018cbaa70..d23f7f44186 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java @@ -23,17 +23,12 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.StringHelper; @@ -94,78 +89,6 @@ public class TestPrefixQuery extends LuceneTestCase { directory.close(); } - static final class BinaryTokenStream extends TokenStream { - private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class); - private boolean available = true; - - public BinaryTokenStream(BytesRef bytes) { - bytesAtt.setBytesRef(bytes); - } - - @Override - public boolean incrementToken() { - if (available) { - clearAttributes(); - available = false; - return true; - } - return false; - } - - @Override - public void reset() { - available = true; - } - - public interface ByteTermAttribute extends TermToBytesRefAttribute { - public void setBytesRef(BytesRef bytes); - } - - public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute { - private BytesRef bytes; - - @Override - public void fillBytesRef() { - // no-op: the bytes was already filled by our owner's incrementToken - } - - @Override - public BytesRef getBytesRef() { - return bytes; - } - - @Override - public void setBytesRef(BytesRef bytes) { - this.bytes = bytes; - } - - @Override - public void clear() {} - - @Override - public void copyTo(AttributeImpl target) { - ByteTermAttributeImpl other = (ByteTermAttributeImpl) target; - other.bytes = bytes; - } - } - } - - /** Basically a StringField that accepts binary term. */ - private static class BinaryField extends Field { - - final static FieldType TYPE; - static { - TYPE = new FieldType(StringField.TYPE_NOT_STORED); - // Necessary so our custom tokenStream is used by Field.tokenStream: - TYPE.setTokenized(true); - TYPE.freeze(); - } - - public BinaryField(String name, BytesRef value) { - super(name, new BinaryTokenStream(value), TYPE); - } - } - public void testRandomBinaryPrefix() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); @@ -182,7 +105,7 @@ public class TestPrefixQuery extends LuceneTestCase { Collections.shuffle(termsList, random()); for(BytesRef term : termsList) { Document doc = new Document(); - doc.add(new BinaryField("field", term)); + doc.add(newStringField("field", term, Field.Store.NO)); w.addDocument(doc); } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java index b51d71aeba5..2927c143186 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java @@ -167,14 +167,11 @@ public abstract class AbstractTestCase extends LuceneTestCase { try (TokenStream tokenStream = analyzer.tokenStream(field, text)) { TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class); - - BytesRef bytesRef = termAttribute.getBytesRef(); - + tokenStream.reset(); while (tokenStream.incrementToken()) { - termAttribute.fillBytesRef(); - bytesRefs.add(BytesRef.deepCopyOf(bytesRef)); + bytesRefs.add(BytesRef.deepCopyOf(termAttribute.getBytesRef())); } tokenStream.end(); diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 0e66909e980..dd3e556e770 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -453,18 +453,16 @@ public class MemoryIndex { PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); PayloadAttribute payloadAtt = storePayloads ? stream.addAttribute(PayloadAttribute.class) : null; - BytesRef ref = termAtt.getBytesRef(); stream.reset(); while (stream.incrementToken()) { - termAtt.fillBytesRef(); // if (DEBUG) System.err.println("token='" + term + "'"); numTokens++; final int posIncr = posIncrAttribute.getPositionIncrement(); if (posIncr == 0) numOverlapTokens++; pos += posIncr; - int ord = terms.add(ref); + int ord = terms.add(termAtt.getBytesRef()); if (ord < 0) { ord = (-ord) - 1; postingsWriter.reset(sliceArray.end[ord]); diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java index 6e4c6577215..be79878d416 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java @@ -612,15 +612,14 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); if (!source.incrementToken()) throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part); - termAtt.fillBytesRef(); + BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef()); if (source.incrementToken()) throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part); source.end(); - return BytesRef.deepCopyOf(bytes); + return bytes; } catch (IOException e) { throw new RuntimeException("Error analyzing multiTerm term: " + part, e); } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java index e7978d130b4..7a59d619f06 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java @@ -53,11 +53,9 @@ public class SpanOrTermsBuilder extends SpanBuilderBase { try (TokenStream ts = analyzer.tokenStream(fieldName, value)) { TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { - termAtt.fillBytesRef(); - SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes))); + SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef()))); clausesList.add(stq); } ts.end(); diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java index a196c4b8217..bb95bc7446e 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java @@ -55,11 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder { try (TokenStream ts = analyzer.tokenStream(fieldName, text)) { TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); Term term = null; - BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { - termAtt.fillBytesRef(); - term = new Term(fieldName, BytesRef.deepCopyOf(bytes)); + term = new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef())); bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD)); } ts.end(); diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/TokenStreamToTermAutomatonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/TokenStreamToTermAutomatonQuery.java index 406ee32b146..e8f838ba901 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/TokenStreamToTermAutomatonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/TokenStreamToTermAutomatonQuery.java @@ -61,8 +61,6 @@ public class TokenStreamToTermAutomatonQuery { final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class); final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class); - final BytesRef term = termBytesAtt.getBytesRef(); - in.reset(); TermAutomatonQuery query = new TermAutomatonQuery(field); @@ -93,7 +91,7 @@ public class TokenStreamToTermAutomatonQuery { state = query.createState(); } - termBytesAtt.fillBytesRef(); + BytesRef term = termBytesAtt.getBytesRef(); //System.out.println(pos + "-" + endPos + ": " + term.utf8ToString() + ": posInc=" + posInc); if (term.length == 1 && term.bytes[term.offset] == (byte) '*') { query.addAnyTransition(pos, endPos); diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java index 8ca8e1e7f6a..d82d2f10b9d 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java @@ -20,11 +20,7 @@ package org.apache.lucene.spatial.prefix; import java.io.IOException; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.util.Attribute; -import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; @@ -37,85 +33,6 @@ import org.apache.lucene.util.BytesRefIterator; */ class BytesRefIteratorTokenStream extends TokenStream { - // just a wrapper to prevent adding CharTermAttribute - private static final class BRAttributeFactory extends AttributeFactory { - private final AttributeFactory delegate; - - BRAttributeFactory(AttributeFactory delegate) { - this.delegate = delegate; - } - - @Override - public AttributeImpl createAttributeInstance(Class attClass) { - if (CharTermAttribute.class.isAssignableFrom(attClass)) - throw new IllegalArgumentException(getClass() + " does not support CharTermAttribute."); - return delegate.createAttributeInstance(attClass); - } - } - - private static final class BRTermToBytesRefAttributeImpl extends AttributeImpl - implements TermToBytesRefAttribute { - private final BytesRef bytes = new BytesRef(); - - void setBytesRef(BytesRef inputBytes) { - // shallow clone. this.bytesRef is final - bytes.bytes = inputBytes.bytes; - bytes.offset = inputBytes.offset; - bytes.length = inputBytes.length; - } - - @Override - public void clear() { - // we keep it untouched as it's fully controlled by the outer class. - } - - @Override - public void copyTo(AttributeImpl target) { - final BRTermToBytesRefAttributeImpl a = (BRTermToBytesRefAttributeImpl) target; - a.setBytesRef(BytesRef.deepCopyOf(bytes)); - } - - @Override - public void fillBytesRef() { - //nothing to do; it's populated by incrementToken - } - - @Override - public BytesRef getBytesRef() { - return bytes; - } - - @Override - public BRTermToBytesRefAttributeImpl clone() { - // super.clone won't work since we need a new BytesRef reference and it's nice to have it final. The superclass - // has no state to copy anyway. - final BRTermToBytesRefAttributeImpl clone = new BRTermToBytesRefAttributeImpl(); - clone.setBytesRef(BytesRef.deepCopyOf(bytes)); - return clone; - } - - @Override - public int hashCode() { - return bytes.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (getClass() != obj.getClass()) return false; - BRTermToBytesRefAttributeImpl other = (BRTermToBytesRefAttributeImpl) obj; - if (!bytes.equals(other.bytes)) return false; - return true; - } - } - - public BytesRefIteratorTokenStream() { - super(new BRAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY)); - addAttributeImpl(new BRTermToBytesRefAttributeImpl());//because non-public constructor - bytesAtt = (BRTermToBytesRefAttributeImpl) addAttribute(TermToBytesRefAttribute.class); - } - public BytesRefIterator getBytesRefIterator() { return bytesIter; } @@ -129,7 +46,6 @@ class BytesRefIteratorTokenStream extends TokenStream { public void reset() throws IOException { if (bytesIter == null) throw new IllegalStateException("call setBytesRefIterator() before usage"); - bytesAtt.getBytesRef().length = 0; } @Override @@ -137,14 +53,12 @@ class BytesRefIteratorTokenStream extends TokenStream { if (bytesIter == null) throw new IllegalStateException("call setBytesRefIterator() before usage"); - // this will only clear all other attributes in this TokenStream - clearAttributes();//TODO but there should be no "other" attributes - // get next BytesRef bytes = bytesIter.next(); if (bytes == null) { return false; } else { + clearAttributes(); bytesAtt.setBytesRef(bytes); //note: we don't bother setting posInc or type attributes. There's no point to it. return true; @@ -152,7 +66,7 @@ class BytesRefIteratorTokenStream extends TokenStream { } //members - private final BRTermToBytesRefAttributeImpl bytesAtt; + private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class); private BytesRefIterator bytesIter = null; // null means not initialized diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index 2bbdd465233..37c06a7ed65 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -486,11 +486,10 @@ public class FreeTextSuggester extends Lookup implements Accountable { // Run full analysis, but save only the // last 1gram, last 2gram, etc.: - BytesRef tokenBytes = termBytesAtt.getBytesRef(); int maxEndOffset = -1; boolean sawRealToken = false; while(ts.incrementToken()) { - termBytesAtt.fillBytesRef(); + BytesRef tokenBytes = termBytesAtt.getBytesRef(); sawRealToken |= tokenBytes.length > 0; // TODO: this is somewhat iffy; today, ShingleFilter // sets posLen to the gram count; maybe we should make diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java index d5adf68f50d..f4150a8833f 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java @@ -58,7 +58,7 @@ public final class CompletionTokenStream extends TokenStream { private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class); private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class); - private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class); + private final BytesRefBuilderTermAttribute bytesAtt = addAttribute(BytesRefBuilderTermAttribute.class); private final TokenStream input; final boolean preserveSep; @@ -309,9 +309,7 @@ public final class CompletionTokenStream extends TokenStream { /** * Attribute providing access to the term builder and UTF-16 conversion */ - private interface ByteTermAttribute extends TermToBytesRefAttribute { - // marker interface - + private interface BytesRefBuilderTermAttribute extends TermToBytesRefAttribute { /** * Returns the builder from which the term is derived. */ @@ -326,20 +324,15 @@ public final class CompletionTokenStream extends TokenStream { /** * Custom attribute implementation for completion token stream */ - public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute { + public static final class BytesRefBuilderTermAttributeImpl extends AttributeImpl implements BytesRefBuilderTermAttribute, TermToBytesRefAttribute { private final BytesRefBuilder bytes = new BytesRefBuilder(); - private CharsRefBuilder charsRef; + private transient CharsRefBuilder charsRef; /** * Sole constructor * no-op */ - public ByteTermAttributeImpl() { - } - - @Override - public void fillBytesRef() { - // does nothing - we change in place + public BytesRefBuilderTermAttributeImpl() { } @Override @@ -359,10 +352,17 @@ public final class CompletionTokenStream extends TokenStream { @Override public void copyTo(AttributeImpl target) { - ByteTermAttributeImpl other = (ByteTermAttributeImpl) target; + BytesRefBuilderTermAttributeImpl other = (BytesRefBuilderTermAttributeImpl) target; other.bytes.copyBytes(bytes); } + @Override + public AttributeImpl clone() { + BytesRefBuilderTermAttributeImpl other = new BytesRefBuilderTermAttributeImpl(); + copyTo(other); + return other; + } + @Override public CharSequence toUTF16() { if (charsRef == null) { diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java index 2b487bf820a..59405112fa0 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java @@ -51,7 +51,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.search.suggest.Input; import org.apache.lucene.search.suggest.InputArrayIterator; -import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LineFileDocs; diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedBinaryTokenStream.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedBinaryTokenStream.java index 00231f7c51a..d11cc35e35e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedBinaryTokenStream.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedBinaryTokenStream.java @@ -17,13 +17,11 @@ package org.apache.lucene.analysis; * limitations under the License. */ +import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; /** * TokenStream from a canned list of binary (BytesRef-based) @@ -54,65 +52,11 @@ public final class CannedBinaryTokenStream extends TokenStream { private final BinaryToken[] tokens; private int upto = 0; - private final BinaryTermAttribute termAtt = addAttribute(BinaryTermAttribute.class); + private final BytesTermAttribute termAtt = addAttribute(BytesTermAttribute.class); private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - /** An attribute extending {@link - * TermToBytesRefAttribute} but exposing {@link - * #setBytesRef} method. */ - public interface BinaryTermAttribute extends TermToBytesRefAttribute { - - /** Set the current binary value. */ - public void setBytesRef(BytesRef bytes); - } - - /** Implementation for {@link BinaryTermAttribute}. */ - public final static class BinaryTermAttributeImpl extends AttributeImpl implements BinaryTermAttribute, TermToBytesRefAttribute { - private final BytesRefBuilder bytes = new BytesRefBuilder(); - - @Override - public void fillBytesRef() { - bytes.get(); // sets the length on the bytesref - } - - @Override - public BytesRef getBytesRef() { - return bytes.get(); - } - - @Override - public void setBytesRef(BytesRef bytes) { - this.bytes.copyBytes(bytes); - } - - @Override - public void clear() { - } - - @Override - public boolean equals(Object other) { - return other == this; - } - - @Override - public int hashCode() { - return System.identityHashCode(this); - } - - @Override - public void copyTo(AttributeImpl target) { - BinaryTermAttributeImpl other = (BinaryTermAttributeImpl) target; - other.bytes.copyBytes(bytes); - } - - @Override - public BinaryTermAttributeImpl clone() { - throw new UnsupportedOperationException(); - } - } - public CannedBinaryTokenStream(BinaryToken... tokens) { super(); this.tokens = tokens; diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java index ea524d98e81..5911e4d8519 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java @@ -183,12 +183,10 @@ public abstract class CollationTestBase extends LuceneTestCase { String term = TestUtil.randomSimpleString(random()); try (TokenStream ts = analyzer.tokenStream("fake", term)) { TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); ts.reset(); assertTrue(ts.incrementToken()); - termAtt.fillBytesRef(); // ensure we make a copy of the actual bytes too - map.put(term, BytesRef.deepCopyOf(bytes)); + map.put(term, BytesRef.deepCopyOf(termAtt.getBytesRef())); assertFalse(ts.incrementToken()); ts.end(); } @@ -205,11 +203,9 @@ public abstract class CollationTestBase extends LuceneTestCase { BytesRef expected = mapping.getValue(); try (TokenStream ts = analyzer.tokenStream("fake", term)) { TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); ts.reset(); assertTrue(ts.incrementToken()); - termAtt.fillBytesRef(); - assertEquals(expected, bytes); + assertEquals(expected, termAtt.getBytesRef()); assertFalse(ts.incrementToken()); ts.end(); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockUTF16TermAttributeImpl.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockUTF16TermAttributeImpl.java index 542403abb99..aa5dbac01c0 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockUTF16TermAttributeImpl.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockUTF16TermAttributeImpl.java @@ -34,11 +34,11 @@ public class MockUTF16TermAttributeImpl extends CharTermAttributeImpl { AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, MockUTF16TermAttributeImpl.class); @Override - public void fillBytesRef() { - BytesRef bytes = getBytesRef(); - byte[] utf16 = toString().getBytes(StandardCharsets.UTF_16LE); - bytes.bytes = utf16; - bytes.offset = 0; - bytes.length = utf16.length; + public BytesRef getBytesRef() { + final BytesRef ref = this.builder.get(); + ref.bytes = toString().getBytes(StandardCharsets.UTF_16LE); + ref.offset = 0; + ref.length = ref.bytes.length; + return ref; } } diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java index d167251b329..ce943f49e7c 100644 --- a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java +++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java @@ -252,16 +252,16 @@ public class ICUCollationField extends FieldType { source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); + // we control the analyzer here: most errors are impossible if (!source.incrementToken()) throw new IllegalArgumentException("analyzer returned no terms for text: " + text); - termAtt.fillBytesRef(); + BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef()); assert !source.incrementToken(); source.end(); - return BytesRef.deepCopyOf(bytes); + return bytes; } catch (IOException e) { throw new RuntimeException("Unable to analyze text: " + text, e); } diff --git a/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java index 624a31e8298..f9aa75286df 100644 --- a/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java +++ b/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java @@ -148,13 +148,11 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { try (TokenStream tokenStream = analyzer.tokenStream("", query)){ final Set tokens = new HashSet<>(); final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); - final BytesRef bytes = bytesAtt.getBytesRef(); tokenStream.reset(); while (tokenStream.incrementToken()) { - bytesAtt.fillBytesRef(); - tokens.add(BytesRef.deepCopyOf(bytes)); + tokens.add(BytesRef.deepCopyOf(bytesAtt.getBytesRef())); } tokenStream.end(); @@ -246,7 +244,6 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { final NamedList tokenNamedList = new SimpleOrderedMap<>(); final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class); BytesRef rawBytes = termAtt.getBytesRef(); - termAtt.fillBytesRef(); final String text = fieldType.indexedToReadable(rawBytes, new CharsRefBuilder()).toString(); tokenNamedList.add("text", text); diff --git a/solr/core/src/java/org/apache/solr/schema/CollationField.java b/solr/core/src/java/org/apache/solr/schema/CollationField.java index fc0216e61ae..9e2505337bf 100644 --- a/solr/core/src/java/org/apache/solr/schema/CollationField.java +++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java @@ -224,16 +224,14 @@ public class CollationField extends FieldType { try (TokenStream source = analyzer.tokenStream(field, text)) { source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); // we control the analyzer here: most errors are impossible if (!source.incrementToken()) throw new IllegalArgumentException("analyzer returned no terms for text: " + text); - termAtt.fillBytesRef(); + BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef()); assert !source.incrementToken(); - source.end(); - return BytesRef.deepCopyOf(bytes); + return bytes; } catch (IOException e) { throw new RuntimeException("Unable to analyze text: " + text, e); } diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index 28a170f4e9f..9fbc4d31a9d 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -146,16 +146,15 @@ public class TextField extends FieldType { source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); if (!source.incrementToken()) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part); - termAtt.fillBytesRef(); + BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef()); if (source.incrementToken()) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part); source.end(); - return BytesRef.deepCopyOf(bytes); + return bytes; } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e); } diff --git a/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java b/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java index a2c89bdf669..58c3fafc0d0 100644 --- a/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java +++ b/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java @@ -267,10 +267,9 @@ public class CursorMarkTest extends SolrTestCaseJ4 { String term = TestUtil.randomRealisticUnicodeString(random()); try (TokenStream ts = analyzer.tokenStream("fake", term)) { TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); - val = termAtt.getBytesRef(); ts.reset(); assertTrue(ts.incrementToken()); - termAtt.fillBytesRef(); + val = BytesRef.deepCopyOf(termAtt.getBytesRef()); assertFalse(ts.incrementToken()); ts.end(); }