mirror of https://github.com/apache/lucene.git
LUCENE-6653, LUCENE-6652: Refactor TermToBytesRefAttribute; add oal.analysis.tokenattributes.BytesTermAttribute; remove code duplication in tests
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1688830 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
96a0dc55d5
commit
1816ed1945
|
@ -116,6 +116,10 @@ New Features
|
|||
* LUCENE-6632: Geo3D: Compute circle planes more accurately.
|
||||
(Karl Wright via David Smiley)
|
||||
|
||||
* LUCENE-6653: Added general purpose BytesTermAttribute to basic token
|
||||
attributes package that can be used for TokenStreams that solely produce
|
||||
binary terms. (Uwe Schindler)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-6508: Simplify Lock api, there is now just
|
||||
|
@ -295,6 +299,9 @@ Test Framework
|
|||
environments (e.g., read-only dirs). If tests are running without a security
|
||||
manager, an assume cancels test execution automatically. (Uwe Schindler)
|
||||
|
||||
* LUCENE-6652: Removed lots of useless Byte(s)TermAttributes all over test
|
||||
infrastructure. (Uwe Schindler)
|
||||
|
||||
Changes in Backwards Compatibility Policy
|
||||
|
||||
* LUCENE-6553: The iterator returned by the LeafReader.postings method now
|
||||
|
@ -305,6 +312,11 @@ Changes in Backwards Compatibility Policy
|
|||
DiversifiedTopDocsCollector can be used instead with a maximum number of hits
|
||||
per key equal to 1. (Adrien Grand)
|
||||
|
||||
* LUCENE-6653: The workflow for consuming the TermToBytesRefAttribute was changed:
|
||||
getBytesRef() now does all work and is called on each token, fillBytesRef()
|
||||
was removed. The implementation is free to reuse the internal BytesRef
|
||||
or return a new one on each call. (Uwe Schindler)
|
||||
|
||||
======================= Lucene 5.2.1 =======================
|
||||
|
||||
Bug Fixes
|
||||
|
|
|
@ -40,11 +40,12 @@ public class CollatedTermAttributeImpl extends CharTermAttributeImpl {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
BytesRef bytes = getBytesRef();
|
||||
bytes.bytes = collator.getCollationKey(toString()).toByteArray();
|
||||
bytes.offset = 0;
|
||||
bytes.length = bytes.bytes.length;
|
||||
public BytesRef getBytesRef() {
|
||||
final BytesRef ref = this.builder.get();
|
||||
ref.bytes = collator.getCollationKey(toString()).toByteArray();
|
||||
ref.offset = 0;
|
||||
ref.length = ref.bytes.length;
|
||||
return ref;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -45,11 +45,12 @@ public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
BytesRef bytes = getBytesRef();
|
||||
public BytesRef getBytesRef() {
|
||||
collator.getRawCollationKey(toString(), key);
|
||||
bytes.bytes = key.bytes;
|
||||
bytes.offset = 0;
|
||||
bytes.length = key.size;
|
||||
final BytesRef ref = this.builder.get();
|
||||
ref.bytes = key.bytes;
|
||||
ref.offset = 0;
|
||||
ref.length = key.size;
|
||||
return ref;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -96,9 +96,4 @@ public class MorphosyntacticTagsAttributeImpl extends AttributeImpl
|
|||
this.copyTo(cloned);
|
||||
return cloned;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return tags == null ? "<no tags>" : tags.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,7 +87,7 @@ public class ReadTokensTask extends PerfTask {
|
|||
|
||||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||
while(stream.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
termAtt.getBytesRef();
|
||||
tokenCount++;
|
||||
}
|
||||
stream.end();
|
||||
|
|
|
@ -976,8 +976,6 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
|||
assertTrue(ts2.incrementToken());
|
||||
BytesRef bytes1 = termAtt1.getBytesRef();
|
||||
BytesRef bytes2 = termAtt2.getBytesRef();
|
||||
termAtt1.fillBytesRef();
|
||||
termAtt2.fillBytesRef();
|
||||
assertEquals(bytes1, bytes2);
|
||||
assertFalse(ts1.incrementToken());
|
||||
assertFalse(ts2.incrementToken());
|
||||
|
|
|
@ -29,8 +29,6 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -58,7 +56,6 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -234,7 +231,7 @@ public class TestAutoPrefixTerms extends LuceneTestCase {
|
|||
|
||||
for(Integer term : terms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new BinaryField("field", intToBytes(term)));
|
||||
doc.add(newStringField("field", intToBytes(term), Field.Store.NO));
|
||||
doc.add(new NumericDocValuesField("field", term));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
@ -506,78 +503,6 @@ public class TestAutoPrefixTerms extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
static final class BinaryTokenStream extends TokenStream {
|
||||
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||
private boolean available = true;
|
||||
|
||||
public BinaryTokenStream(BytesRef bytes) {
|
||||
bytesAtt.setBytesRef(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (available) {
|
||||
clearAttributes();
|
||||
available = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
available = true;
|
||||
}
|
||||
|
||||
public interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||
void setBytesRef(BytesRef bytes);
|
||||
}
|
||||
|
||||
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
|
||||
private BytesRef bytes;
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
// no-op: the bytes was already filled by our owner's incrementToken
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBytesRef(BytesRef bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
|
||||
other.bytes = bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Basically a StringField that accepts binary term. */
|
||||
private static class BinaryField extends Field {
|
||||
|
||||
final static FieldType TYPE;
|
||||
static {
|
||||
TYPE = new FieldType(StringField.TYPE_NOT_STORED);
|
||||
// Necessary so our custom tokenStream is used by Field.tokenStream:
|
||||
TYPE.setTokenized(true);
|
||||
TYPE.freeze();
|
||||
}
|
||||
|
||||
public BinaryField(String name, BytesRef value) {
|
||||
super(name, new BinaryTokenStream(value), TYPE);
|
||||
}
|
||||
}
|
||||
|
||||
/** Helper class to ensure auto-prefix terms 1) never overlap one another, and 2) are used when they should be. */
|
||||
private static class VerifyAutoPrefixTerms {
|
||||
final FixedBitSet allHits;
|
||||
|
|
|
@ -158,17 +158,13 @@ public final class NumericTokenStream extends TokenStream {
|
|||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
assert valueSize == 64 || valueSize == 32;
|
||||
if (valueSize == 64) {
|
||||
NumericUtils.longToPrefixCoded(value, shift, bytes);
|
||||
} else {
|
||||
NumericUtils.intToPrefixCoded((int) value, shift, bytes);
|
||||
}
|
||||
return bytes.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -201,8 +197,7 @@ public final class NumericTokenStream extends TokenStream {
|
|||
|
||||
@Override
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
fillBytesRef();
|
||||
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes.toBytesRef());
|
||||
reflector.reflect(TermToBytesRefAttribute.class, "bytes", getBytesRef());
|
||||
reflector.reflect(NumericTermAttribute.class, "shift", shift);
|
||||
reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());
|
||||
reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize);
|
||||
|
@ -219,7 +214,7 @@ public final class NumericTokenStream extends TokenStream {
|
|||
NumericTermAttributeImpl t = (NumericTermAttributeImpl)super.clone();
|
||||
// Do a deep clone
|
||||
t.bytes = new BytesRefBuilder();
|
||||
t.bytes.copyBytes(bytes.get());
|
||||
t.bytes.copyBytes(getBytesRef());
|
||||
return t;
|
||||
}
|
||||
|
||||
|
|
|
@ -105,8 +105,6 @@ public class TokenStreamToAutomaton {
|
|||
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
|
||||
final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class);
|
||||
|
||||
final BytesRef term = termBytesAtt.getBytesRef();
|
||||
|
||||
in.reset();
|
||||
|
||||
// Only temporarily holds states ahead of our current
|
||||
|
@ -157,8 +155,7 @@ public class TokenStreamToAutomaton {
|
|||
|
||||
final int endPos = pos + posLengthAtt.getPositionLength();
|
||||
|
||||
termBytesAtt.fillBytesRef();
|
||||
final BytesRef termUTF8 = changeToken(term);
|
||||
final BytesRef termUTF8 = changeToken(termBytesAtt.getBytesRef());
|
||||
int[] termUnicode = null;
|
||||
final Position endPosData = positions.get(endPos);
|
||||
if (endPosData.arriving == -1) {
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package org.apache.lucene.analysis.tokenattributes;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* This attribute can be used if you have the raw term bytes to be indexed.
|
||||
* It can be used as replacement for {@link CharTermAttribute}, if binary
|
||||
* terms should be indexed.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public interface BytesTermAttribute extends TermToBytesRefAttribute {
|
||||
/** Sets the {@link BytesRef} of the term */
|
||||
public void setBytesRef(BytesRef bytes);
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
package org.apache.lucene.analysis.tokenattributes;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Implementation class for {@link BytesTermAttribute}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class BytesTermAttributeImpl extends AttributeImpl implements BytesTermAttribute, TermToBytesRefAttribute {
|
||||
private BytesRef bytes;
|
||||
|
||||
/** Initialize this attribute with no bytes. */
|
||||
public BytesTermAttributeImpl() {}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBytesRef(BytesRef bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
this.bytes = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
BytesTermAttributeImpl other = (BytesTermAttributeImpl) target;
|
||||
other.bytes = BytesRef.deepCopyOf(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AttributeImpl clone() {
|
||||
BytesTermAttributeImpl c = (BytesTermAttributeImpl)super.clone();
|
||||
copyTo(c);
|
||||
return c;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
|
||||
}
|
||||
}
|
|
@ -33,6 +33,9 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
|||
private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
|
||||
private int termLength = 0;
|
||||
|
||||
/** May be used by subclasses to convert to different charsets / encodings for implementing {@link #getBytesRef()}. */
|
||||
protected BytesRefBuilder builder = new BytesRefBuilder();
|
||||
|
||||
/** Initialize this attribute with empty term text */
|
||||
public CharTermAttributeImpl() {}
|
||||
|
||||
|
@ -83,16 +86,11 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
|||
}
|
||||
|
||||
// *** TermToBytesRefAttribute interface ***
|
||||
private BytesRefBuilder bytes = new BytesRefBuilder();
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
bytes.copyChars(termBuffer, 0, termLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes.get();
|
||||
builder.copyChars(termBuffer, 0, termLength);
|
||||
return builder.get();
|
||||
}
|
||||
|
||||
// *** CharSequence interface ***
|
||||
|
@ -228,8 +226,8 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
|||
// Do a deep clone
|
||||
t.termBuffer = new char[this.termLength];
|
||||
System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength);
|
||||
t.bytes = new BytesRefBuilder();
|
||||
t.bytes.copyBytes(bytes.get());
|
||||
t.builder = new BytesRefBuilder();
|
||||
t.builder.copyBytes(builder.get());
|
||||
return t;
|
||||
}
|
||||
|
||||
|
@ -271,8 +269,7 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
|||
@Override
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
reflector.reflect(CharTermAttribute.class, "term", toString());
|
||||
fillBytesRef();
|
||||
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes.toBytesRef());
|
||||
reflector.reflect(TermToBytesRefAttribute.class, "bytes", getBytesRef());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -24,17 +24,12 @@ import org.apache.lucene.util.BytesRef;
|
|||
* This attribute is requested by TermsHashPerField to index the contents.
|
||||
* This attribute can be used to customize the final byte[] encoding of terms.
|
||||
* <p>
|
||||
* Consumers of this attribute call {@link #getBytesRef()} up-front, and then
|
||||
* invoke {@link #fillBytesRef()} for each term. Example:
|
||||
* Consumers of this attribute call {@link #getBytesRef()} for each term. Example:
|
||||
* <pre class="prettyprint">
|
||||
* final TermToBytesRefAttribute termAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||
* final BytesRef bytes = termAtt.getBytesRef();
|
||||
*
|
||||
* while (tokenStream.incrementToken() {
|
||||
*
|
||||
* // you must call termAtt.fillBytesRef() before doing something with the bytes.
|
||||
* // this encodes the term value (internally it might be a char[], etc) into the bytes.
|
||||
* int hashCode = termAtt.fillBytesRef();
|
||||
* final BytesRef bytes = termAtt.getBytesRef();
|
||||
*
|
||||
* if (isInteresting(bytes)) {
|
||||
*
|
||||
|
@ -42,27 +37,21 @@ import org.apache.lucene.util.BytesRef;
|
|||
* // you should make a copy if you need persistent access to the bytes, otherwise they will
|
||||
* // be rewritten across calls to incrementToken()
|
||||
*
|
||||
* doSomethingWith(new BytesRef(bytes));
|
||||
* doSomethingWith(BytesRef.deepCopyOf(bytes));
|
||||
* }
|
||||
* }
|
||||
* ...
|
||||
* </pre>
|
||||
* @lucene.experimental This is a very expert API, please use
|
||||
* {@link CharTermAttributeImpl} and its implementation of this method
|
||||
* for UTF-8 terms.
|
||||
* @lucene.internal This is a very expert and internal API, please use
|
||||
* {@link CharTermAttribute} and its implementation for UTF-8 terms; to
|
||||
* index binary terms, use {@link BytesTermAttribute} and its implementation.
|
||||
*/
|
||||
public interface TermToBytesRefAttribute extends Attribute {
|
||||
|
||||
/**
|
||||
* Updates the bytes {@link #getBytesRef()} to contain this term's
|
||||
* final encoding.
|
||||
*/
|
||||
public void fillBytesRef();
|
||||
|
||||
/**
|
||||
* Retrieve this attribute's BytesRef. The bytes are updated
|
||||
* from the current term when the consumer calls {@link #fillBytesRef()}.
|
||||
* @return this Attributes internal BytesRef.
|
||||
* Retrieve this attribute's BytesRef. The bytes are updated from the current term.
|
||||
* The implementation may return a new instance or keep the previous one.
|
||||
* @return a BytesRef to be indexed (only stays valid until token stream gets incremented)
|
||||
*/
|
||||
public BytesRef getBytesRef();
|
||||
}
|
||||
|
|
|
@ -23,17 +23,14 @@ import java.io.Reader;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.NumericTokenStream;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.document.FieldType.NumericType;
|
||||
import org.apache.lucene.index.FieldInvertState; // javadocs
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriter; // javadocs
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.IndexableFieldType;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -570,71 +567,47 @@ public class Field implements IndexableField, StorableField {
|
|||
}
|
||||
|
||||
private static final class BinaryTokenStream extends TokenStream {
|
||||
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||
|
||||
// Do not init this to true, becase caller must first call reset:
|
||||
private boolean available;
|
||||
private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class);
|
||||
private boolean used = true;
|
||||
private BytesRef value;
|
||||
|
||||
public BinaryTokenStream() {
|
||||
/** Creates a new TokenStream that returns a BytesRef as single token.
|
||||
* <p>Warning: Does not initialize the value, you must call
|
||||
* {@link #setValue(BytesRef)} afterwards!
|
||||
*/
|
||||
BinaryTokenStream() {
|
||||
}
|
||||
|
||||
public void setValue(BytesRef value) {
|
||||
bytesAtt.setBytesRef(value);
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (available) {
|
||||
clearAttributes();
|
||||
available = false;
|
||||
return true;
|
||||
if (used) {
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
clearAttributes();
|
||||
bytesAtt.setBytesRef(value);
|
||||
used = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
available = true;
|
||||
used = false;
|
||||
}
|
||||
|
||||
public interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||
public void setBytesRef(BytesRef bytes);
|
||||
}
|
||||
|
||||
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
|
||||
private BytesRef bytes;
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
// no-op: the bytes was already filled by our owner's incrementToken
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBytesRef(BytesRef bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
|
||||
other.bytes = bytes;
|
||||
}
|
||||
@Override
|
||||
public void close() {
|
||||
value = null;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class StringTokenStream extends TokenStream {
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||
private boolean used = false;
|
||||
private boolean used = true;
|
||||
private String value = null;
|
||||
|
||||
/** Creates a new TokenStream that returns a String as single token.
|
||||
|
|
|
@ -36,7 +36,6 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
|||
protected final DocumentsWriterPerThread.DocState docState;
|
||||
protected final FieldInvertState fieldState;
|
||||
TermToBytesRefAttribute termAtt;
|
||||
BytesRef termBytesRef;
|
||||
|
||||
// Copied from our perThread
|
||||
final IntBlockPool intPool;
|
||||
|
@ -145,13 +144,10 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
|||
* entry point (for first TermsHash); postings use this
|
||||
* API. */
|
||||
void add() throws IOException {
|
||||
|
||||
termAtt.fillBytesRef();
|
||||
|
||||
// We are first in the chain so we must "intern" the
|
||||
// term text into textStart address
|
||||
// Get the text & hash of this term.
|
||||
int termID = bytesHash.add(termBytesRef);
|
||||
int termID = bytesHash.add(termAtt.getBytesRef());
|
||||
|
||||
//System.out.println("add term=" + termBytesRef.utf8ToString() + " doc=" + docState.docID + " termID=" + termID);
|
||||
|
||||
|
@ -292,10 +288,6 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
|||
* document. */
|
||||
boolean start(IndexableField field, boolean first) {
|
||||
termAtt = fieldState.termAttribute;
|
||||
// EmptyTokenStream can have null term att
|
||||
if (termAtt != null) {
|
||||
termBytesRef = termAtt.getBytesRef();
|
||||
}
|
||||
if (nextPerField != null) {
|
||||
doNextCall = nextPerField.start(field, first);
|
||||
}
|
||||
|
|
|
@ -267,15 +267,13 @@ public class QueryBuilder {
|
|||
*/
|
||||
private Query analyzeTerm(String field, TokenStream stream) throws IOException {
|
||||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
||||
stream.reset();
|
||||
if (!stream.incrementToken()) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
termAtt.fillBytesRef();
|
||||
return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
|
||||
return newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -286,12 +284,10 @@ public class QueryBuilder {
|
|||
q.setDisableCoord(true);
|
||||
|
||||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
Query currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
|
||||
Query currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())));
|
||||
q.add(currentQuery, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
||||
|
@ -317,18 +313,15 @@ public class QueryBuilder {
|
|||
BooleanQuery.Builder currentQuery = newBooleanQuery(true);
|
||||
|
||||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
||||
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
if (posIncrAtt.getPositionIncrement() != 0) {
|
||||
add(q, currentQuery.build(), operator);
|
||||
currentQuery = newBooleanQuery(true);
|
||||
}
|
||||
currentQuery.add(newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))), BooleanClause.Occur.SHOULD);
|
||||
currentQuery.add(newTermQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()))), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
add(q, currentQuery.build(), operator);
|
||||
|
||||
|
@ -343,21 +336,17 @@ public class QueryBuilder {
|
|||
builder.setSlop(slop);
|
||||
|
||||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
||||
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
|
||||
int position = -1;
|
||||
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
|
||||
if (enablePositionIncrements) {
|
||||
position += posIncrAtt.getPositionIncrement();
|
||||
} else {
|
||||
position += 1;
|
||||
}
|
||||
builder.add(new Term(field, bytes), position);
|
||||
builder.add(new Term(field, termAtt.getBytesRef()), position);
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
|
@ -371,7 +360,6 @@ public class QueryBuilder {
|
|||
mpq.setSlop(slop);
|
||||
|
||||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
||||
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
|
||||
int position = -1;
|
||||
|
@ -379,7 +367,6 @@ public class QueryBuilder {
|
|||
List<Term> multiTerms = new ArrayList<>();
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
int positionIncrement = posIncrAtt.getPositionIncrement();
|
||||
|
||||
if (positionIncrement > 0 && multiTerms.size() > 0) {
|
||||
|
@ -391,7 +378,7 @@ public class QueryBuilder {
|
|||
multiTerms.clear();
|
||||
}
|
||||
position += positionIncrement;
|
||||
multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes)));
|
||||
multiTerms.add(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())));
|
||||
}
|
||||
|
||||
if (enablePositionIncrements) {
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.analysis.NumericTokenStream.NumericTermAttributeImpl;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
|
@ -40,14 +39,12 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
|
|||
assertNotNull(typeAtt);
|
||||
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
|
||||
assertNotNull(numericAtt);
|
||||
final BytesRef bytes = bytesAtt.getBytesRef();
|
||||
stream.reset();
|
||||
assertEquals(64, numericAtt.getValueSize());
|
||||
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
|
||||
assertTrue("New token is available", stream.incrementToken());
|
||||
assertEquals("Shift value wrong", shift, numericAtt.getShift());
|
||||
bytesAtt.fillBytesRef();
|
||||
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
|
||||
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytesAtt.getBytesRef()));
|
||||
assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
|
||||
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
|
||||
}
|
||||
|
@ -65,14 +62,12 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
|
|||
assertNotNull(typeAtt);
|
||||
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
|
||||
assertNotNull(numericAtt);
|
||||
final BytesRef bytes = bytesAtt.getBytesRef();
|
||||
stream.reset();
|
||||
assertEquals(32, numericAtt.getValueSize());
|
||||
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
|
||||
assertTrue("New token is available", stream.incrementToken());
|
||||
assertEquals("Shift value wrong", shift, numericAtt.getShift());
|
||||
bytesAtt.fillBytesRef();
|
||||
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
|
||||
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytesAtt.getBytesRef()));
|
||||
assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
|
||||
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
|
||||
}
|
||||
|
@ -123,6 +118,7 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testAttributeClone() throws Exception {
|
||||
NumericTermAttributeImpl att = new NumericTermAttributeImpl();
|
||||
att.init(1234L, 64, 8, 0); // set some value, to make getBytesRef() work
|
||||
NumericTermAttributeImpl copy = TestCharTermAttributeImpl.assertCloneIsEqual(att);
|
||||
assertNotSame(att.getBytesRef(), copy.getBytesRef());
|
||||
NumericTermAttributeImpl copy2 = TestCharTermAttributeImpl.assertCopyIsEqual(att);
|
||||
|
|
|
@ -1,86 +0,0 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.analysis.CannedBinaryTokenStream; // javadocs
|
||||
|
||||
/**
|
||||
* A binary tokenstream that lets you index a single
|
||||
* binary token (BytesRef value).
|
||||
*
|
||||
* @see CannedBinaryTokenStream
|
||||
*/
|
||||
public final class BinaryTokenStream extends TokenStream {
|
||||
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||
private boolean available = true;
|
||||
|
||||
public BinaryTokenStream(BytesRef bytes) {
|
||||
bytesAtt.setBytesRef(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (available) {
|
||||
clearAttributes();
|
||||
available = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
available = true;
|
||||
}
|
||||
|
||||
public interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||
public void setBytesRef(BytesRef bytes);
|
||||
}
|
||||
|
||||
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
|
||||
private BytesRef bytes;
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
// no-op: the bytes was already filled by our owner's incrementToken
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBytesRef(BytesRef bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
|
||||
other.bytes = bytes;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -108,11 +108,6 @@ public class Test2BTerms extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private final static class MyTermAttributeImpl extends AttributeImpl implements TermToBytesRefAttribute {
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
// no-op: the bytes was already filled by our owner's incrementToken
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
|
@ -122,18 +117,9 @@ public class Test2BTerms extends LuceneTestCase {
|
|||
public void clear() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return other == this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return System.identityHashCode(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.io.IOException;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
@ -38,7 +37,6 @@ public class TestBinaryTerms extends LuceneTestCase {
|
|||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
BytesRef bytes = new BytesRef(2);
|
||||
BinaryTokenStream tokenStream = new BinaryTokenStream(bytes);
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
bytes.bytes[0] = (byte) i;
|
||||
|
@ -47,8 +45,8 @@ public class TestBinaryTerms extends LuceneTestCase {
|
|||
Document doc = new Document();
|
||||
FieldType customType = new FieldType();
|
||||
customType.setStored(true);
|
||||
doc.add(new Field("id", "" + i, customType));
|
||||
doc.add(new TextField("bytes", tokenStream));
|
||||
doc.add(newField("id", "" + i, customType));
|
||||
doc.add(newStringField("bytes", bytes, Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
|
||||
|
|
|
@ -49,14 +49,13 @@ public class TestLongPostings extends LuceneTestCase {
|
|||
}
|
||||
try (TokenStream ts = a.tokenStream("foo", s)) {
|
||||
final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
|
||||
final BytesRef termBytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
|
||||
int count = 0;
|
||||
boolean changed = false;
|
||||
|
||||
while(ts.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
final BytesRef termBytes = termAtt.getBytesRef();
|
||||
if (count == 0 && !termBytes.utf8ToString().equals(s)) {
|
||||
// The value was changed during analysis. Keep iterating so the
|
||||
// tokenStream is exhausted.
|
||||
|
|
|
@ -23,17 +23,12 @@ import java.util.HashSet;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -94,78 +89,6 @@ public class TestPrefixQuery extends LuceneTestCase {
|
|||
directory.close();
|
||||
}
|
||||
|
||||
static final class BinaryTokenStream extends TokenStream {
|
||||
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||
private boolean available = true;
|
||||
|
||||
public BinaryTokenStream(BytesRef bytes) {
|
||||
bytesAtt.setBytesRef(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (available) {
|
||||
clearAttributes();
|
||||
available = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
available = true;
|
||||
}
|
||||
|
||||
public interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||
public void setBytesRef(BytesRef bytes);
|
||||
}
|
||||
|
||||
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
|
||||
private BytesRef bytes;
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
// no-op: the bytes was already filled by our owner's incrementToken
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBytesRef(BytesRef bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
|
||||
other.bytes = bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Basically a StringField that accepts binary term. */
|
||||
private static class BinaryField extends Field {
|
||||
|
||||
final static FieldType TYPE;
|
||||
static {
|
||||
TYPE = new FieldType(StringField.TYPE_NOT_STORED);
|
||||
// Necessary so our custom tokenStream is used by Field.tokenStream:
|
||||
TYPE.setTokenized(true);
|
||||
TYPE.freeze();
|
||||
}
|
||||
|
||||
public BinaryField(String name, BytesRef value) {
|
||||
super(name, new BinaryTokenStream(value), TYPE);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomBinaryPrefix() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
|
@ -182,7 +105,7 @@ public class TestPrefixQuery extends LuceneTestCase {
|
|||
Collections.shuffle(termsList, random());
|
||||
for(BytesRef term : termsList) {
|
||||
Document doc = new Document();
|
||||
doc.add(new BinaryField("field", term));
|
||||
doc.add(newStringField("field", term, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
|
|
|
@ -167,14 +167,11 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
|||
|
||||
try (TokenStream tokenStream = analyzer.tokenStream(field, text)) {
|
||||
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||
|
||||
BytesRef bytesRef = termAttribute.getBytesRef();
|
||||
|
||||
|
||||
tokenStream.reset();
|
||||
|
||||
while (tokenStream.incrementToken()) {
|
||||
termAttribute.fillBytesRef();
|
||||
bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
|
||||
bytesRefs.add(BytesRef.deepCopyOf(termAttribute.getBytesRef()));
|
||||
}
|
||||
|
||||
tokenStream.end();
|
||||
|
|
|
@ -453,18 +453,16 @@ public class MemoryIndex {
|
|||
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
|
||||
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
|
||||
PayloadAttribute payloadAtt = storePayloads ? stream.addAttribute(PayloadAttribute.class) : null;
|
||||
BytesRef ref = termAtt.getBytesRef();
|
||||
stream.reset();
|
||||
|
||||
while (stream.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
// if (DEBUG) System.err.println("token='" + term + "'");
|
||||
numTokens++;
|
||||
final int posIncr = posIncrAttribute.getPositionIncrement();
|
||||
if (posIncr == 0)
|
||||
numOverlapTokens++;
|
||||
pos += posIncr;
|
||||
int ord = terms.add(ref);
|
||||
int ord = terms.add(termAtt.getBytesRef());
|
||||
if (ord < 0) {
|
||||
ord = (-ord) - 1;
|
||||
postingsWriter.reset(sliceArray.end[ord]);
|
||||
|
|
|
@ -612,15 +612,14 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
|
|||
source.reset();
|
||||
|
||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
||||
if (!source.incrementToken())
|
||||
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
|
||||
termAtt.fillBytesRef();
|
||||
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
|
||||
if (source.incrementToken())
|
||||
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
|
||||
source.end();
|
||||
return BytesRef.deepCopyOf(bytes);
|
||||
return bytes;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
|
||||
}
|
||||
|
|
|
@ -53,11 +53,9 @@ public class SpanOrTermsBuilder extends SpanBuilderBase {
|
|||
|
||||
try (TokenStream ts = analyzer.tokenStream(fieldName, value)) {
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
while (ts.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
|
||||
SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef())));
|
||||
clausesList.add(stq);
|
||||
}
|
||||
ts.end();
|
||||
|
|
|
@ -55,11 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder {
|
|||
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
Term term = null;
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
while (ts.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
term = new Term(fieldName, BytesRef.deepCopyOf(bytes));
|
||||
term = new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef()));
|
||||
bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
|
||||
}
|
||||
ts.end();
|
||||
|
|
|
@ -61,8 +61,6 @@ public class TokenStreamToTermAutomatonQuery {
|
|||
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
|
||||
final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class);
|
||||
|
||||
final BytesRef term = termBytesAtt.getBytesRef();
|
||||
|
||||
in.reset();
|
||||
|
||||
TermAutomatonQuery query = new TermAutomatonQuery(field);
|
||||
|
@ -93,7 +91,7 @@ public class TokenStreamToTermAutomatonQuery {
|
|||
state = query.createState();
|
||||
}
|
||||
|
||||
termBytesAtt.fillBytesRef();
|
||||
BytesRef term = termBytesAtt.getBytesRef();
|
||||
//System.out.println(pos + "-" + endPos + ": " + term.utf8ToString() + ": posInc=" + posInc);
|
||||
if (term.length == 1 && term.bytes[term.offset] == (byte) '*') {
|
||||
query.addAnyTransition(pos, endPos);
|
||||
|
|
|
@ -20,11 +20,7 @@ package org.apache.lucene.spatial.prefix;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
|
||||
|
@ -37,85 +33,6 @@ import org.apache.lucene.util.BytesRefIterator;
|
|||
*/
|
||||
class BytesRefIteratorTokenStream extends TokenStream {
|
||||
|
||||
// just a wrapper to prevent adding CharTermAttribute
|
||||
private static final class BRAttributeFactory extends AttributeFactory {
|
||||
private final AttributeFactory delegate;
|
||||
|
||||
BRAttributeFactory(AttributeFactory delegate) {
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
|
||||
if (CharTermAttribute.class.isAssignableFrom(attClass))
|
||||
throw new IllegalArgumentException(getClass() + " does not support CharTermAttribute.");
|
||||
return delegate.createAttributeInstance(attClass);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class BRTermToBytesRefAttributeImpl extends AttributeImpl
|
||||
implements TermToBytesRefAttribute {
|
||||
private final BytesRef bytes = new BytesRef();
|
||||
|
||||
void setBytesRef(BytesRef inputBytes) {
|
||||
// shallow clone. this.bytesRef is final
|
||||
bytes.bytes = inputBytes.bytes;
|
||||
bytes.offset = inputBytes.offset;
|
||||
bytes.length = inputBytes.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
// we keep it untouched as it's fully controlled by the outer class.
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
final BRTermToBytesRefAttributeImpl a = (BRTermToBytesRefAttributeImpl) target;
|
||||
a.setBytesRef(BytesRef.deepCopyOf(bytes));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
//nothing to do; it's populated by incrementToken
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BRTermToBytesRefAttributeImpl clone() {
|
||||
// super.clone won't work since we need a new BytesRef reference and it's nice to have it final. The superclass
|
||||
// has no state to copy anyway.
|
||||
final BRTermToBytesRefAttributeImpl clone = new BRTermToBytesRefAttributeImpl();
|
||||
clone.setBytesRef(BytesRef.deepCopyOf(bytes));
|
||||
return clone;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return bytes.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (obj == null) return false;
|
||||
if (getClass() != obj.getClass()) return false;
|
||||
BRTermToBytesRefAttributeImpl other = (BRTermToBytesRefAttributeImpl) obj;
|
||||
if (!bytes.equals(other.bytes)) return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public BytesRefIteratorTokenStream() {
|
||||
super(new BRAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY));
|
||||
addAttributeImpl(new BRTermToBytesRefAttributeImpl());//because non-public constructor
|
||||
bytesAtt = (BRTermToBytesRefAttributeImpl) addAttribute(TermToBytesRefAttribute.class);
|
||||
}
|
||||
|
||||
public BytesRefIterator getBytesRefIterator() {
|
||||
return bytesIter;
|
||||
}
|
||||
|
@ -129,7 +46,6 @@ class BytesRefIteratorTokenStream extends TokenStream {
|
|||
public void reset() throws IOException {
|
||||
if (bytesIter == null)
|
||||
throw new IllegalStateException("call setBytesRefIterator() before usage");
|
||||
bytesAtt.getBytesRef().length = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -137,14 +53,12 @@ class BytesRefIteratorTokenStream extends TokenStream {
|
|||
if (bytesIter == null)
|
||||
throw new IllegalStateException("call setBytesRefIterator() before usage");
|
||||
|
||||
// this will only clear all other attributes in this TokenStream
|
||||
clearAttributes();//TODO but there should be no "other" attributes
|
||||
|
||||
// get next
|
||||
BytesRef bytes = bytesIter.next();
|
||||
if (bytes == null) {
|
||||
return false;
|
||||
} else {
|
||||
clearAttributes();
|
||||
bytesAtt.setBytesRef(bytes);
|
||||
//note: we don't bother setting posInc or type attributes. There's no point to it.
|
||||
return true;
|
||||
|
@ -152,7 +66,7 @@ class BytesRefIteratorTokenStream extends TokenStream {
|
|||
}
|
||||
|
||||
//members
|
||||
private final BRTermToBytesRefAttributeImpl bytesAtt;
|
||||
private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class);
|
||||
|
||||
private BytesRefIterator bytesIter = null; // null means not initialized
|
||||
|
||||
|
|
|
@ -486,11 +486,10 @@ public class FreeTextSuggester extends Lookup implements Accountable {
|
|||
|
||||
// Run full analysis, but save only the
|
||||
// last 1gram, last 2gram, etc.:
|
||||
BytesRef tokenBytes = termBytesAtt.getBytesRef();
|
||||
int maxEndOffset = -1;
|
||||
boolean sawRealToken = false;
|
||||
while(ts.incrementToken()) {
|
||||
termBytesAtt.fillBytesRef();
|
||||
BytesRef tokenBytes = termBytesAtt.getBytesRef();
|
||||
sawRealToken |= tokenBytes.length > 0;
|
||||
// TODO: this is somewhat iffy; today, ShingleFilter
|
||||
// sets posLen to the gram count; maybe we should make
|
||||
|
|
|
@ -58,7 +58,7 @@ public final class CompletionTokenStream extends TokenStream {
|
|||
|
||||
private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class);
|
||||
private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class);
|
||||
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||
private final BytesRefBuilderTermAttribute bytesAtt = addAttribute(BytesRefBuilderTermAttribute.class);
|
||||
|
||||
private final TokenStream input;
|
||||
final boolean preserveSep;
|
||||
|
@ -309,9 +309,7 @@ public final class CompletionTokenStream extends TokenStream {
|
|||
/**
|
||||
* Attribute providing access to the term builder and UTF-16 conversion
|
||||
*/
|
||||
private interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||
// marker interface
|
||||
|
||||
private interface BytesRefBuilderTermAttribute extends TermToBytesRefAttribute {
|
||||
/**
|
||||
* Returns the builder from which the term is derived.
|
||||
*/
|
||||
|
@ -326,20 +324,15 @@ public final class CompletionTokenStream extends TokenStream {
|
|||
/**
|
||||
* Custom attribute implementation for completion token stream
|
||||
*/
|
||||
public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
|
||||
public static final class BytesRefBuilderTermAttributeImpl extends AttributeImpl implements BytesRefBuilderTermAttribute, TermToBytesRefAttribute {
|
||||
private final BytesRefBuilder bytes = new BytesRefBuilder();
|
||||
private CharsRefBuilder charsRef;
|
||||
private transient CharsRefBuilder charsRef;
|
||||
|
||||
/**
|
||||
* Sole constructor
|
||||
* no-op
|
||||
*/
|
||||
public ByteTermAttributeImpl() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
// does nothing - we change in place
|
||||
public BytesRefBuilderTermAttributeImpl() {
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -359,10 +352,17 @@ public final class CompletionTokenStream extends TokenStream {
|
|||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
|
||||
BytesRefBuilderTermAttributeImpl other = (BytesRefBuilderTermAttributeImpl) target;
|
||||
other.bytes.copyBytes(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AttributeImpl clone() {
|
||||
BytesRefBuilderTermAttributeImpl other = new BytesRefBuilderTermAttributeImpl();
|
||||
copyTo(other);
|
||||
return other;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharSequence toUTF16() {
|
||||
if (charsRef == null) {
|
||||
|
|
|
@ -51,7 +51,6 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.Input;
|
||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
|
|
|
@ -17,13 +17,11 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
||||
/**
|
||||
* TokenStream from a canned list of binary (BytesRef-based)
|
||||
|
@ -54,65 +52,11 @@ public final class CannedBinaryTokenStream extends TokenStream {
|
|||
|
||||
private final BinaryToken[] tokens;
|
||||
private int upto = 0;
|
||||
private final BinaryTermAttribute termAtt = addAttribute(BinaryTermAttribute.class);
|
||||
private final BytesTermAttribute termAtt = addAttribute(BytesTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
/** An attribute extending {@link
|
||||
* TermToBytesRefAttribute} but exposing {@link
|
||||
* #setBytesRef} method. */
|
||||
public interface BinaryTermAttribute extends TermToBytesRefAttribute {
|
||||
|
||||
/** Set the current binary value. */
|
||||
public void setBytesRef(BytesRef bytes);
|
||||
}
|
||||
|
||||
/** Implementation for {@link BinaryTermAttribute}. */
|
||||
public final static class BinaryTermAttributeImpl extends AttributeImpl implements BinaryTermAttribute, TermToBytesRefAttribute {
|
||||
private final BytesRefBuilder bytes = new BytesRefBuilder();
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
bytes.get(); // sets the length on the bytesref
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBytesRef(BytesRef bytes) {
|
||||
this.bytes.copyBytes(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return other == this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return System.identityHashCode(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
BinaryTermAttributeImpl other = (BinaryTermAttributeImpl) target;
|
||||
other.bytes.copyBytes(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryTermAttributeImpl clone() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
public CannedBinaryTokenStream(BinaryToken... tokens) {
|
||||
super();
|
||||
this.tokens = tokens;
|
||||
|
|
|
@ -183,12 +183,10 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
String term = TestUtil.randomSimpleString(random());
|
||||
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
assertTrue(ts.incrementToken());
|
||||
termAtt.fillBytesRef();
|
||||
// ensure we make a copy of the actual bytes too
|
||||
map.put(term, BytesRef.deepCopyOf(bytes));
|
||||
map.put(term, BytesRef.deepCopyOf(termAtt.getBytesRef()));
|
||||
assertFalse(ts.incrementToken());
|
||||
ts.end();
|
||||
}
|
||||
|
@ -205,11 +203,9 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
BytesRef expected = mapping.getValue();
|
||||
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
assertTrue(ts.incrementToken());
|
||||
termAtt.fillBytesRef();
|
||||
assertEquals(expected, bytes);
|
||||
assertEquals(expected, termAtt.getBytesRef());
|
||||
assertFalse(ts.incrementToken());
|
||||
ts.end();
|
||||
}
|
||||
|
|
|
@ -34,11 +34,11 @@ public class MockUTF16TermAttributeImpl extends CharTermAttributeImpl {
|
|||
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, MockUTF16TermAttributeImpl.class);
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
BytesRef bytes = getBytesRef();
|
||||
byte[] utf16 = toString().getBytes(StandardCharsets.UTF_16LE);
|
||||
bytes.bytes = utf16;
|
||||
bytes.offset = 0;
|
||||
bytes.length = utf16.length;
|
||||
public BytesRef getBytesRef() {
|
||||
final BytesRef ref = this.builder.get();
|
||||
ref.bytes = toString().getBytes(StandardCharsets.UTF_16LE);
|
||||
ref.offset = 0;
|
||||
ref.length = ref.bytes.length;
|
||||
return ref;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -252,16 +252,16 @@ public class ICUCollationField extends FieldType {
|
|||
source.reset();
|
||||
|
||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
||||
|
||||
// we control the analyzer here: most errors are impossible
|
||||
if (!source.incrementToken())
|
||||
throw new IllegalArgumentException("analyzer returned no terms for text: " + text);
|
||||
termAtt.fillBytesRef();
|
||||
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
|
||||
assert !source.incrementToken();
|
||||
|
||||
source.end();
|
||||
return BytesRef.deepCopyOf(bytes);
|
||||
return bytes;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to analyze text: " + text, e);
|
||||
}
|
||||
|
|
|
@ -148,13 +148,11 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
try (TokenStream tokenStream = analyzer.tokenStream("", query)){
|
||||
final Set<BytesRef> tokens = new HashSet<>();
|
||||
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||
final BytesRef bytes = bytesAtt.getBytesRef();
|
||||
|
||||
tokenStream.reset();
|
||||
|
||||
while (tokenStream.incrementToken()) {
|
||||
bytesAtt.fillBytesRef();
|
||||
tokens.add(BytesRef.deepCopyOf(bytes));
|
||||
tokens.add(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
|
||||
}
|
||||
|
||||
tokenStream.end();
|
||||
|
@ -246,7 +244,6 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<>();
|
||||
final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef rawBytes = termAtt.getBytesRef();
|
||||
termAtt.fillBytesRef();
|
||||
final String text = fieldType.indexedToReadable(rawBytes, new CharsRefBuilder()).toString();
|
||||
tokenNamedList.add("text", text);
|
||||
|
||||
|
|
|
@ -224,16 +224,14 @@ public class CollationField extends FieldType {
|
|||
try (TokenStream source = analyzer.tokenStream(field, text)) {
|
||||
source.reset();
|
||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
||||
// we control the analyzer here: most errors are impossible
|
||||
if (!source.incrementToken())
|
||||
throw new IllegalArgumentException("analyzer returned no terms for text: " + text);
|
||||
termAtt.fillBytesRef();
|
||||
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
|
||||
assert !source.incrementToken();
|
||||
|
||||
source.end();
|
||||
return BytesRef.deepCopyOf(bytes);
|
||||
return bytes;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to analyze text: " + text, e);
|
||||
}
|
||||
|
|
|
@ -146,16 +146,15 @@ public class TextField extends FieldType {
|
|||
source.reset();
|
||||
|
||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
||||
if (!source.incrementToken())
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
|
||||
termAtt.fillBytesRef();
|
||||
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
|
||||
if (source.incrementToken())
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
|
||||
|
||||
source.end();
|
||||
return BytesRef.deepCopyOf(bytes);
|
||||
return bytes;
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
|
||||
}
|
||||
|
|
|
@ -267,10 +267,9 @@ public class CursorMarkTest extends SolrTestCaseJ4 {
|
|||
String term = TestUtil.randomRealisticUnicodeString(random());
|
||||
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
val = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
assertTrue(ts.incrementToken());
|
||||
termAtt.fillBytesRef();
|
||||
val = BytesRef.deepCopyOf(termAtt.getBytesRef());
|
||||
assertFalse(ts.incrementToken());
|
||||
ts.end();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue