LUCENE-2944: fix BytesRef reuse bugs, TermToBytesRefAttribute owns the bytes like other attributes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1083784 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-03-21 13:52:15 +00:00
parent e45d28a8d3
commit e67bf6b089
18 changed files with 137 additions and 98 deletions

View File

@ -353,10 +353,10 @@ public class MemoryIndex {
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
BytesRef ref = new BytesRef(10); BytesRef ref = termAtt.getBytesRef();
stream.reset(); stream.reset();
while (stream.incrementToken()) { while (stream.incrementToken()) {
termAtt.toBytesRef(ref); termAtt.fillBytesRef();
if (ref.length == 0) continue; // nothing to do if (ref.length == 0) continue; // nothing to do
// if (DEBUG) System.err.println("token='" + term + "'"); // if (DEBUG) System.err.println("token='" + term + "'");
numTokens++; numTokens++;

View File

@ -58,11 +58,10 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>(); ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value)); TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
while (ts.incrementToken()) { while (ts.incrementToken()) {
BytesRef term = new BytesRef(); termAtt.fillBytesRef();
termAtt.toBytesRef(term); SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, new BytesRef(bytes)));
SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, term));
clausesList.add(stq); clausesList.add(stq);
} }
SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()])); SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));

View File

@ -63,16 +63,16 @@ public class TermsFilterBuilder implements FilterBuilder
try try
{ {
Term term = null; Term term = null;
BytesRef bytes = termAtt.getBytesRef();
while (ts.incrementToken()) { while (ts.incrementToken()) {
BytesRef bytes = new BytesRef(); termAtt.fillBytesRef();
termAtt.toBytesRef(bytes);
if (term == null) if (term == null)
{ {
term = new Term(fieldName, bytes); term = new Term(fieldName, new BytesRef(bytes));
} else } else
{ {
// create from previous to save fieldName.intern overhead // create from previous to save fieldName.intern overhead
term = term.createTerm(bytes); term = term.createTerm(new BytesRef(bytes));
} }
tf.addTerm(term); tf.addTerm(term);
} }

View File

@ -60,16 +60,16 @@ public class TermsQueryBuilder implements QueryBuilder {
{ {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null; Term term = null;
BytesRef bytes = termAtt.getBytesRef();
while (ts.incrementToken()) { while (ts.incrementToken()) {
BytesRef bytes = new BytesRef(); termAtt.fillBytesRef();
termAtt.toBytesRef(bytes);
if (term == null) if (term == null)
{ {
term = new Term(fieldName, bytes); term = new Term(fieldName, new BytesRef(bytes));
} else } else
{ {
// create from previous to save fieldName.intern overhead // create from previous to save fieldName.intern overhead
term = term.createTerm(bytes); term = term.createTerm(new BytesRef(bytes));
} }
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD)); bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
} }

View File

@ -142,8 +142,13 @@ public final class NumericTokenStream extends TokenStream {
public static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute { public static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute {
private long value = 0L; private long value = 0L;
private int valueSize = 0, shift = 0, precisionStep = 0; private int valueSize = 0, shift = 0, precisionStep = 0;
private BytesRef bytes = new BytesRef();
public BytesRef getBytesRef() {
return bytes;
}
public int toBytesRef(BytesRef bytes) { public int fillBytesRef() {
try { try {
assert valueSize == 64 || valueSize == 32; assert valueSize == 64 || valueSize == 32;
return (valueSize == 64) ? return (valueSize == 64) ?
@ -180,8 +185,7 @@ public final class NumericTokenStream extends TokenStream {
@Override @Override
public void reflectWith(AttributeReflector reflector) { public void reflectWith(AttributeReflector reflector) {
final BytesRef bytes = new BytesRef(); fillBytesRef();
toBytesRef(bytes);
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes); reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
reflector.reflect(NumericTermAttribute.class, "shift", shift); reflector.reflect(NumericTermAttribute.class, "shift", shift);
reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue()); reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());

View File

@ -77,8 +77,16 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
} }
// *** TermToBytesRefAttribute interface *** // *** TermToBytesRefAttribute interface ***
public int toBytesRef(BytesRef target) { private BytesRef bytes = new BytesRef(MIN_BUFFER_SIZE);
return UnicodeUtil.UTF16toUTF8WithHash(termBuffer, 0, termLength, target);
@Override
public int fillBytesRef() {
return UnicodeUtil.UTF16toUTF8WithHash(termBuffer, 0, termLength, bytes);
}
@Override
public BytesRef getBytesRef() {
return bytes;
} }
// *** CharSequence interface *** // *** CharSequence interface ***
@ -205,6 +213,7 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
// Do a deep clone // Do a deep clone
t.termBuffer = new char[this.termLength]; t.termBuffer = new char[this.termLength];
System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength); System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength);
t.bytes = new BytesRef(bytes);
return t; return t;
} }
@ -246,9 +255,8 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
@Override @Override
public void reflectWith(AttributeReflector reflector) { public void reflectWith(AttributeReflector reflector) {
reflector.reflect(CharTermAttribute.class, "term", toString()); reflector.reflect(CharTermAttribute.class, "term", toString());
final BytesRef bytes = new BytesRef(); fillBytesRef();
toBytesRef(bytes); reflector.reflect(TermToBytesRefAttribute.class, "bytes", new BytesRef(bytes));
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
} }
@Override @Override

View File

@ -22,18 +22,39 @@ import org.apache.lucene.util.BytesRef;
/** /**
* This attribute is requested by TermsHashPerField to index the contents. * This attribute is requested by TermsHashPerField to index the contents.
* This attribute has no real state, it should be implemented in addition to * This attribute can be used to customize the final byte[] encoding of terms.
* {@link CharTermAttribute}, to support indexing the term text as * <p>
* UTF-8 bytes. * Consumers of this attribute call {@link #getBytesRef()} up-front, and then
* invoke {@link #fillBytesRef()} for each term. Example:
* <pre class="prettyprint">
* final TermToBytesRefAttribute termAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
* final BytesRef bytes = termAtt.getBytesRef();
*
* while (termAtt.incrementToken() {
*
* // you must call termAtt.fillBytesRef() before doing something with the bytes.
* // this encodes the term value (internally it might be a char[], etc) into the bytes.
* int hashCode = termAtt.fillBytesRef();
*
* if (isInteresting(bytes)) {
*
* // because the bytes are reused by the attribute (like CharTermAttribute's char[] buffer),
* // you should make a copy if you need persistent access to the bytes, otherwise they will
* // be rewritten across calls to incrementToken()
*
* doSomethingWith(new BytesRef(bytes));
* }
* }
* ...
* </pre>
* @lucene.experimental This is a very expert API, please use * @lucene.experimental This is a very expert API, please use
* {@link CharTermAttributeImpl} and its implementation of this method * {@link CharTermAttributeImpl} and its implementation of this method
* for UTF-8 terms. * for UTF-8 terms.
*/ */
public interface TermToBytesRefAttribute extends Attribute { public interface TermToBytesRefAttribute extends Attribute {
/** Copies the token's term text into the given {@link BytesRef}. /**
* @param termBytes destination to write the bytes to (UTF-8 for text terms). * Updates the bytes {@link #getBytesRef()} to contain this term's
* The length of the BytesRef's buffer may be not large enough, so you need to grow. * final encoding, and returns its hashcode.
* The parameters' {@code bytes} is guaranteed to be not {@code null}.
* @return the hashcode as defined by {@link BytesRef#hashCode}: * @return the hashcode as defined by {@link BytesRef#hashCode}:
* <pre> * <pre>
* int hash = 0; * int hash = 0;
@ -45,5 +66,12 @@ public interface TermToBytesRefAttribute extends Attribute {
* the hash on-the-fly. If this is not the case, just return * the hash on-the-fly. If this is not the case, just return
* {@code termBytes.hashCode()}. * {@code termBytes.hashCode()}.
*/ */
public int toBytesRef(BytesRef termBytes); public int fillBytesRef();
/**
* Retrieve this attribute's BytesRef. The bytes are updated
* from the current term when the consumer calls {@link #fillBytesRef()}.
* @return this Attributes internal BytesRef.
*/
public BytesRef getBytesRef();
} }

View File

@ -39,6 +39,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
final DocumentsWriter.DocState docState; final DocumentsWriter.DocState docState;
final FieldInvertState fieldState; final FieldInvertState fieldState;
TermToBytesRefAttribute termAtt; TermToBytesRefAttribute termAtt;
BytesRef termBytesRef;
// Copied from our perThread // Copied from our perThread
final IntBlockPool intPool; final IntBlockPool intPool;
@ -53,7 +54,6 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
final BytesRefHash bytesHash; final BytesRefHash bytesHash;
ParallelPostingsArray postingsArray; ParallelPostingsArray postingsArray;
private final BytesRef termBytesRef;
private final AtomicLong bytesUsed; private final AtomicLong bytesUsed;
public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) { public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) {
@ -70,7 +70,6 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
streamCount = consumer.getStreamCount(); streamCount = consumer.getStreamCount();
numPostingInt = 2*streamCount; numPostingInt = 2*streamCount;
termBytesRef = perThread.termBytesRef;
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
if (nextPerThread != null) if (nextPerThread != null)
nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo); nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);
@ -119,6 +118,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
@Override @Override
void start(Fieldable f) { void start(Fieldable f) {
termAtt = fieldState.attributeSource.getAttribute(TermToBytesRefAttribute.class); termAtt = fieldState.attributeSource.getAttribute(TermToBytesRefAttribute.class);
termBytesRef = termAtt.getBytesRef();
consumer.start(f); consumer.start(f);
if (nextPerField != null) { if (nextPerField != null) {
nextPerField.start(f); nextPerField.start(f);
@ -181,7 +181,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
// Get the text & hash of this term. // Get the text & hash of this term.
int termID; int termID;
try{ try{
termID = bytesHash.add(termBytesRef, termAtt.toBytesRef(termBytesRef)); termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef());
}catch (MaxBytesLengthExceededException e) { }catch (MaxBytesLengthExceededException e) {
// Not enough room in current block // Not enough room in current block
// Just skip this term, to remain as robust as // Just skip this term, to remain as robust as

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/ */
import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import java.io.IOException; import java.io.IOException;
@ -35,8 +34,6 @@ final class TermsHashPerThread extends InvertedDocConsumerPerThread {
final boolean primary; final boolean primary;
final DocumentsWriter.DocState docState; final DocumentsWriter.DocState docState;
// Used by perField to obtain terms from the analysis chain
final BytesRef termBytesRef = new BytesRef(10);
public TermsHashPerThread(DocInverterPerThread docInverterPerThread, final TermsHash termsHash, final TermsHash nextTermsHash, final TermsHashPerThread primaryPerThread) { public TermsHashPerThread(DocInverterPerThread docInverterPerThread, final TermsHash termsHash, final TermsHash nextTermsHash, final TermsHashPerThread primaryPerThread) {
docState = docInverterPerThread.docState; docState = docInverterPerThread.docState;

View File

@ -532,18 +532,19 @@ public abstract class QueryParserBase {
// ignore // ignore
} }
BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();
if (numTokens == 0) if (numTokens == 0)
return null; return null;
else if (numTokens == 1) { else if (numTokens == 1) {
BytesRef term = new BytesRef();
try { try {
boolean hasNext = buffer.incrementToken(); boolean hasNext = buffer.incrementToken();
assert hasNext == true; assert hasNext == true;
termAtt.toBytesRef(term); termAtt.fillBytesRef();
} catch (IOException e) { } catch (IOException e) {
// safe to ignore, because we know the number of tokens // safe to ignore, because we know the number of tokens
} }
return newTermQuery(new Term(field, term)); return newTermQuery(new Term(field, new BytesRef(bytes)));
} else { } else {
if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) { if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) {
if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) { if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) {
@ -554,17 +555,15 @@ public abstract class QueryParserBase {
BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
for (int i = 0; i < numTokens; i++) { for (int i = 0; i < numTokens; i++) {
BytesRef term = new BytesRef();
try { try {
boolean hasNext = buffer.incrementToken(); boolean hasNext = buffer.incrementToken();
assert hasNext == true; assert hasNext == true;
termAtt.toBytesRef(term); termAtt.fillBytesRef();
} catch (IOException e) { } catch (IOException e) {
// safe to ignore, because we know the number of tokens // safe to ignore, because we know the number of tokens
} }
Query currentQuery = newTermQuery( Query currentQuery = newTermQuery(
new Term(field, term)); new Term(field, new BytesRef(bytes)));
q.add(currentQuery, occur); q.add(currentQuery, occur);
} }
return q; return q;
@ -576,12 +575,11 @@ public abstract class QueryParserBase {
List<Term> multiTerms = new ArrayList<Term>(); List<Term> multiTerms = new ArrayList<Term>();
int position = -1; int position = -1;
for (int i = 0; i < numTokens; i++) { for (int i = 0; i < numTokens; i++) {
BytesRef term = new BytesRef();
int positionIncrement = 1; int positionIncrement = 1;
try { try {
boolean hasNext = buffer.incrementToken(); boolean hasNext = buffer.incrementToken();
assert hasNext == true; assert hasNext == true;
termAtt.toBytesRef(term); termAtt.fillBytesRef();
if (posIncrAtt != null) { if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement(); positionIncrement = posIncrAtt.getPositionIncrement();
} }
@ -598,7 +596,7 @@ public abstract class QueryParserBase {
multiTerms.clear(); multiTerms.clear();
} }
position += positionIncrement; position += positionIncrement;
multiTerms.add(new Term(field, term)); multiTerms.add(new Term(field, new BytesRef(bytes)));
} }
if (enablePositionIncrements) { if (enablePositionIncrements) {
mpq.add(multiTerms.toArray(new Term[0]),position); mpq.add(multiTerms.toArray(new Term[0]),position);
@ -613,15 +611,13 @@ public abstract class QueryParserBase {
pq.setSlop(phraseSlop); pq.setSlop(phraseSlop);
int position = -1; int position = -1;
for (int i = 0; i < numTokens; i++) { for (int i = 0; i < numTokens; i++) {
BytesRef term = new BytesRef();
int positionIncrement = 1; int positionIncrement = 1;
try { try {
boolean hasNext = buffer.incrementToken(); boolean hasNext = buffer.incrementToken();
assert hasNext == true; assert hasNext == true;
termAtt.toBytesRef(term); termAtt.fillBytesRef();
if (posIncrAtt != null) { if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement(); positionIncrement = posIncrAtt.getPositionIncrement();
} }
@ -631,9 +627,9 @@ public abstract class QueryParserBase {
if (enablePositionIncrements) { if (enablePositionIncrements) {
position += positionIncrement; position += positionIncrement;
pq.add(new Term(field, term),position); pq.add(new Term(field, new BytesRef(bytes)),position);
} else { } else {
pq.add(new Term(field, term)); pq.add(new Term(field, new BytesRef(bytes)));
} }
} }
return pq; return pq;
@ -796,13 +792,13 @@ public abstract class QueryParserBase {
source = analyzer.tokenStream(field, new StringReader(part)); source = analyzer.tokenStream(field, new StringReader(part));
} }
BytesRef result = new BytesRef();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
try { try {
if (!source.incrementToken()) if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for range part: " + part); throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
termAtt.toBytesRef(result); termAtt.fillBytesRef();
if (source.incrementToken()) if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part); throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part);
} catch (IOException e) { } catch (IOException e) {
@ -812,8 +808,8 @@ public abstract class QueryParserBase {
try { try {
source.close(); source.close();
} catch (IOException ignored) {} } catch (IOException ignored) {}
return result; return new BytesRef(bytes);
} }
/** /**

View File

@ -66,10 +66,10 @@ public class QueryTermVector implements TermFreqVector {
final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
hasMoreTokens = stream.incrementToken(); hasMoreTokens = stream.incrementToken();
BytesRef bytes = termAtt.getBytesRef();
while (hasMoreTokens) { while (hasMoreTokens) {
BytesRef bytes = new BytesRef(); termAtt.fillBytesRef();
termAtt.toBytesRef(bytes); terms.add(new BytesRef(bytes));
terms.add(bytes);
hasMoreTokens = stream.incrementToken(); hasMoreTokens = stream.incrementToken();
} }
processTerms(terms.toArray(new BytesRef[terms.size()])); processTerms(terms.toArray(new BytesRef[terms.size()]));

View File

@ -35,13 +35,13 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class); final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
final BytesRef bytes = new BytesRef(); final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset(); stream.reset();
assertEquals(64, numericAtt.getValueSize()); assertEquals(64, numericAtt.getValueSize());
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken()); assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift()); assertEquals("Shift value wrong", shift, numericAtt.getShift());
final int hash = bytesAtt.toBytesRef(bytes); final int hash = bytesAtt.fillBytesRef();
assertEquals("Hash incorrect", bytes.hashCode(), hash); assertEquals("Hash incorrect", bytes.hashCode(), hash);
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes)); assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
@ -58,13 +58,13 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class); final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
final BytesRef bytes = new BytesRef(); final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset(); stream.reset();
assertEquals(32, numericAtt.getValueSize()); assertEquals(32, numericAtt.getValueSize());
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken()); assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift()); assertEquals("Shift value wrong", shift, numericAtt.getShift());
final int hash = bytesAtt.toBytesRef(bytes); final int hash = bytesAtt.fillBytesRef();
assertEquals("Hash incorrect", bytes.hashCode(), hash); assertEquals("Hash incorrect", bytes.hashCode(), hash);
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes)); assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());

View File

@ -74,12 +74,14 @@ public class Test2BTerms extends LuceneTestCase {
} }
private final static class MyTermAttributeImpl extends AttributeImpl implements TermToBytesRefAttribute { private final static class MyTermAttributeImpl extends AttributeImpl implements TermToBytesRefAttribute {
public int toBytesRef(BytesRef bs) { public int fillBytesRef() {
bs.bytes = bytes.bytes;
bs.offset = bytes.offset;
bs.length = bytes.length;
return bytes.hashCode(); return bytes.hashCode();
} }
public BytesRef getBytesRef() {
return bytes;
}
@Override @Override
public void clear() { public void clear() {
} }

View File

@ -40,11 +40,12 @@ public class CollatedTermAttributeImpl extends CharTermAttributeImpl {
} }
@Override @Override
public int toBytesRef(BytesRef target) { public int fillBytesRef() {
target.bytes = collator.getCollationKey(toString()).toByteArray(); BytesRef bytes = getBytesRef();
target.offset = 0; bytes.bytes = collator.getCollationKey(toString()).toByteArray();
target.length = target.bytes.length; bytes.offset = 0;
return target.hashCode(); bytes.length = bytes.bytes.length;
return bytes.hashCode();
} }
} }

View File

@ -283,7 +283,6 @@ public abstract class CollationTestBase extends LuceneTestCase {
int numTestPoints = 100; int numTestPoints = 100;
int numThreads = _TestUtil.nextInt(random, 3, 5); int numThreads = _TestUtil.nextInt(random, 3, 5);
final HashMap<String,BytesRef> map = new HashMap<String,BytesRef>(); final HashMap<String,BytesRef> map = new HashMap<String,BytesRef>();
BytesRef spare = new BytesRef();
// create a map<String,SortKey> up front. // create a map<String,SortKey> up front.
// then with multiple threads, generate sort keys for all the keys in the map // then with multiple threads, generate sort keys for all the keys in the map
@ -292,12 +291,13 @@ public abstract class CollationTestBase extends LuceneTestCase {
for (int i = 0; i < numTestPoints; i++) { for (int i = 0; i < numTestPoints; i++) {
String term = randomString(); String term = randomString();
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term)); TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset(); ts.reset();
assertTrue(ts.incrementToken()); assertTrue(ts.incrementToken());
bytes.toBytesRef(spare); termAtt.fillBytesRef();
// ensure we make a copy of the actual bytes too // ensure we make a copy of the actual bytes too
map.put(term, new BytesRef(spare)); map.put(term, new BytesRef(bytes));
} }
Thread threads[] = new Thread[numThreads]; Thread threads[] = new Thread[numThreads];
@ -306,16 +306,16 @@ public abstract class CollationTestBase extends LuceneTestCase {
@Override @Override
public void run() { public void run() {
try { try {
BytesRef spare = new BytesRef();
for (Map.Entry<String,BytesRef> mapping : map.entrySet()) { for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
String term = mapping.getKey(); String term = mapping.getKey();
BytesRef expected = mapping.getValue(); BytesRef expected = mapping.getValue();
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term)); TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset(); ts.reset();
assertTrue(ts.incrementToken()); assertTrue(ts.incrementToken());
bytes.toBytesRef(spare); termAtt.fillBytesRef();
assertEquals(expected, spare); assertEquals(expected, bytes);
} }
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);

View File

@ -30,7 +30,7 @@ import com.ibm.icu.text.RawCollationKey;
public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl { public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
private final Collator collator; private final Collator collator;
private final RawCollationKey key = new RawCollationKey(); private final RawCollationKey key = new RawCollationKey();
/** /**
* Create a new ICUCollatedTermAttributeImpl * Create a new ICUCollatedTermAttributeImpl
* @param collator Collation key generator * @param collator Collation key generator
@ -43,13 +43,14 @@ public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
@Override @Override
public int toBytesRef(BytesRef target) { public int fillBytesRef() {
BytesRef bytes = getBytesRef();
collator.getRawCollationKey(toString(), key); collator.getRawCollationKey(toString(), key);
target.bytes = key.bytes; bytes.bytes = key.bytes;
target.offset = 0; bytes.offset = 0;
target.length = key.size; bytes.length = key.size;
return target.hashCode(); return bytes.hashCode();
} }
} }

View File

@ -38,7 +38,6 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.util.ByteUtils;
import org.apache.noggit.CharArr; import org.apache.noggit.CharArr;
@ -141,12 +140,12 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
final Set<BytesRef> tokens = new HashSet<BytesRef>(); final Set<BytesRef> tokens = new HashSet<BytesRef>();
final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query)); final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
final BytesRef bytes = bytesAtt.getBytesRef();
try { try {
tokenStream.reset(); tokenStream.reset();
while (tokenStream.incrementToken()) { while (tokenStream.incrementToken()) {
final BytesRef bytes = new BytesRef(); bytesAtt.fillBytesRef();
bytesAtt.toBytesRef(bytes); tokens.add(new BytesRef(bytes));
tokens.add(bytes);
} }
} catch (IOException ioe) { } catch (IOException ioe) {
throw new RuntimeException("Error occured while iterating over tokenstream", ioe); throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
@ -236,12 +235,13 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
FieldType fieldType = context.getFieldType(); FieldType fieldType = context.getFieldType();
final BytesRef rawBytes = new BytesRef();
final CharArr textBuf = new CharArr(); final CharArr textBuf = new CharArr();
for (int i = 0, c = tokens.size(); i < c; i++) { for (int i = 0, c = tokens.size(); i < c; i++) {
AttributeSource token = tokens.get(i); AttributeSource token = tokens.get(i);
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>(); final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(rawBytes); final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
BytesRef rawBytes = termAtt.getBytesRef();
termAtt.fillBytesRef();
textBuf.reset(); textBuf.reset();
fieldType.indexedToReadable(rawBytes, textBuf); fieldType.indexedToReadable(rawBytes, textBuf);

View File

@ -156,10 +156,10 @@
TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
tstream.reset(); tstream.reset();
matches = new HashSet<BytesRef>(); matches = new HashSet<BytesRef>();
final BytesRef bytes = bytesAtt.getBytesRef();
while (tstream.incrementToken()) { while (tstream.incrementToken()) {
final BytesRef bytes = new BytesRef(); bytesAtt.fillBytesRef();
bytesAtt.toBytesRef(bytes); matches.add(new BytesRef(bytes));
matches.add(bytes);
} }
} }
@ -273,14 +273,17 @@
} }
private static class Tok { private static class Tok {
final BytesRef bytes = new BytesRef(); final BytesRef bytes;
final String rawText, text; final String rawText, text;
final int pos; final int pos;
final List<ReflectItem> reflected = new ArrayList<ReflectItem>(); final List<ReflectItem> reflected = new ArrayList<ReflectItem>();
Tok(AttributeSource token, int pos, FieldType ft) { Tok(AttributeSource token, int pos, FieldType ft) {
this.pos = pos; this.pos = pos;
token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(bytes); TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
BytesRef spare = termAtt.getBytesRef();
termAtt.fillBytesRef();
bytes = new BytesRef(spare);
rawText = (token.hasAttribute(CharTermAttribute.class)) ? rawText = (token.hasAttribute(CharTermAttribute.class)) ?
token.getAttribute(CharTermAttribute.class).toString() : null; token.getAttribute(CharTermAttribute.class).toString() : null;
final CharArr textBuf = new CharArr(bytes.length); final CharArr textBuf = new CharArr(bytes.length);