mirror of https://github.com/apache/lucene.git
LUCENE-2944: fix BytesRef reuse bugs, TermToBytesRefAttribute owns the bytes like other attributes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1083784 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e45d28a8d3
commit
e67bf6b089
|
@ -353,10 +353,10 @@ public class MemoryIndex {
|
||||||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
|
||||||
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
|
||||||
BytesRef ref = new BytesRef(10);
|
BytesRef ref = termAtt.getBytesRef();
|
||||||
stream.reset();
|
stream.reset();
|
||||||
while (stream.incrementToken()) {
|
while (stream.incrementToken()) {
|
||||||
termAtt.toBytesRef(ref);
|
termAtt.fillBytesRef();
|
||||||
if (ref.length == 0) continue; // nothing to do
|
if (ref.length == 0) continue; // nothing to do
|
||||||
// if (DEBUG) System.err.println("token='" + term + "'");
|
// if (DEBUG) System.err.println("token='" + term + "'");
|
||||||
numTokens++;
|
numTokens++;
|
||||||
|
|
|
@ -58,11 +58,10 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
|
||||||
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
|
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
|
||||||
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
|
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
BytesRef term = new BytesRef();
|
termAtt.fillBytesRef();
|
||||||
termAtt.toBytesRef(term);
|
SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, new BytesRef(bytes)));
|
||||||
SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, term));
|
|
||||||
clausesList.add(stq);
|
clausesList.add(stq);
|
||||||
}
|
}
|
||||||
SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
|
SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
|
||||||
|
|
|
@ -63,16 +63,16 @@ public class TermsFilterBuilder implements FilterBuilder
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
Term term = null;
|
Term term = null;
|
||||||
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
BytesRef bytes = new BytesRef();
|
termAtt.fillBytesRef();
|
||||||
termAtt.toBytesRef(bytes);
|
|
||||||
if (term == null)
|
if (term == null)
|
||||||
{
|
{
|
||||||
term = new Term(fieldName, bytes);
|
term = new Term(fieldName, new BytesRef(bytes));
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
// create from previous to save fieldName.intern overhead
|
// create from previous to save fieldName.intern overhead
|
||||||
term = term.createTerm(bytes);
|
term = term.createTerm(new BytesRef(bytes));
|
||||||
}
|
}
|
||||||
tf.addTerm(term);
|
tf.addTerm(term);
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,16 +60,16 @@ public class TermsQueryBuilder implements QueryBuilder {
|
||||||
{
|
{
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
Term term = null;
|
Term term = null;
|
||||||
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
BytesRef bytes = new BytesRef();
|
termAtt.fillBytesRef();
|
||||||
termAtt.toBytesRef(bytes);
|
|
||||||
if (term == null)
|
if (term == null)
|
||||||
{
|
{
|
||||||
term = new Term(fieldName, bytes);
|
term = new Term(fieldName, new BytesRef(bytes));
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
// create from previous to save fieldName.intern overhead
|
// create from previous to save fieldName.intern overhead
|
||||||
term = term.createTerm(bytes);
|
term = term.createTerm(new BytesRef(bytes));
|
||||||
}
|
}
|
||||||
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
|
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
|
||||||
}
|
}
|
||||||
|
|
|
@ -142,8 +142,13 @@ public final class NumericTokenStream extends TokenStream {
|
||||||
public static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute {
|
public static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute {
|
||||||
private long value = 0L;
|
private long value = 0L;
|
||||||
private int valueSize = 0, shift = 0, precisionStep = 0;
|
private int valueSize = 0, shift = 0, precisionStep = 0;
|
||||||
|
private BytesRef bytes = new BytesRef();
|
||||||
|
|
||||||
|
public BytesRef getBytesRef() {
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
public int toBytesRef(BytesRef bytes) {
|
public int fillBytesRef() {
|
||||||
try {
|
try {
|
||||||
assert valueSize == 64 || valueSize == 32;
|
assert valueSize == 64 || valueSize == 32;
|
||||||
return (valueSize == 64) ?
|
return (valueSize == 64) ?
|
||||||
|
@ -180,8 +185,7 @@ public final class NumericTokenStream extends TokenStream {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reflectWith(AttributeReflector reflector) {
|
public void reflectWith(AttributeReflector reflector) {
|
||||||
final BytesRef bytes = new BytesRef();
|
fillBytesRef();
|
||||||
toBytesRef(bytes);
|
|
||||||
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
|
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
|
||||||
reflector.reflect(NumericTermAttribute.class, "shift", shift);
|
reflector.reflect(NumericTermAttribute.class, "shift", shift);
|
||||||
reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());
|
reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());
|
||||||
|
|
|
@ -77,8 +77,16 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
||||||
}
|
}
|
||||||
|
|
||||||
// *** TermToBytesRefAttribute interface ***
|
// *** TermToBytesRefAttribute interface ***
|
||||||
public int toBytesRef(BytesRef target) {
|
private BytesRef bytes = new BytesRef(MIN_BUFFER_SIZE);
|
||||||
return UnicodeUtil.UTF16toUTF8WithHash(termBuffer, 0, termLength, target);
|
|
||||||
|
@Override
|
||||||
|
public int fillBytesRef() {
|
||||||
|
return UnicodeUtil.UTF16toUTF8WithHash(termBuffer, 0, termLength, bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getBytesRef() {
|
||||||
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
// *** CharSequence interface ***
|
// *** CharSequence interface ***
|
||||||
|
@ -205,6 +213,7 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
||||||
// Do a deep clone
|
// Do a deep clone
|
||||||
t.termBuffer = new char[this.termLength];
|
t.termBuffer = new char[this.termLength];
|
||||||
System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength);
|
System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength);
|
||||||
|
t.bytes = new BytesRef(bytes);
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,9 +255,8 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
||||||
@Override
|
@Override
|
||||||
public void reflectWith(AttributeReflector reflector) {
|
public void reflectWith(AttributeReflector reflector) {
|
||||||
reflector.reflect(CharTermAttribute.class, "term", toString());
|
reflector.reflect(CharTermAttribute.class, "term", toString());
|
||||||
final BytesRef bytes = new BytesRef();
|
fillBytesRef();
|
||||||
toBytesRef(bytes);
|
reflector.reflect(TermToBytesRefAttribute.class, "bytes", new BytesRef(bytes));
|
||||||
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -22,18 +22,39 @@ import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This attribute is requested by TermsHashPerField to index the contents.
|
* This attribute is requested by TermsHashPerField to index the contents.
|
||||||
* This attribute has no real state, it should be implemented in addition to
|
* This attribute can be used to customize the final byte[] encoding of terms.
|
||||||
* {@link CharTermAttribute}, to support indexing the term text as
|
* <p>
|
||||||
* UTF-8 bytes.
|
* Consumers of this attribute call {@link #getBytesRef()} up-front, and then
|
||||||
|
* invoke {@link #fillBytesRef()} for each term. Example:
|
||||||
|
* <pre class="prettyprint">
|
||||||
|
* final TermToBytesRefAttribute termAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
* final BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
*
|
||||||
|
* while (termAtt.incrementToken() {
|
||||||
|
*
|
||||||
|
* // you must call termAtt.fillBytesRef() before doing something with the bytes.
|
||||||
|
* // this encodes the term value (internally it might be a char[], etc) into the bytes.
|
||||||
|
* int hashCode = termAtt.fillBytesRef();
|
||||||
|
*
|
||||||
|
* if (isInteresting(bytes)) {
|
||||||
|
*
|
||||||
|
* // because the bytes are reused by the attribute (like CharTermAttribute's char[] buffer),
|
||||||
|
* // you should make a copy if you need persistent access to the bytes, otherwise they will
|
||||||
|
* // be rewritten across calls to incrementToken()
|
||||||
|
*
|
||||||
|
* doSomethingWith(new BytesRef(bytes));
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ...
|
||||||
|
* </pre>
|
||||||
* @lucene.experimental This is a very expert API, please use
|
* @lucene.experimental This is a very expert API, please use
|
||||||
* {@link CharTermAttributeImpl} and its implementation of this method
|
* {@link CharTermAttributeImpl} and its implementation of this method
|
||||||
* for UTF-8 terms.
|
* for UTF-8 terms.
|
||||||
*/
|
*/
|
||||||
public interface TermToBytesRefAttribute extends Attribute {
|
public interface TermToBytesRefAttribute extends Attribute {
|
||||||
/** Copies the token's term text into the given {@link BytesRef}.
|
/**
|
||||||
* @param termBytes destination to write the bytes to (UTF-8 for text terms).
|
* Updates the bytes {@link #getBytesRef()} to contain this term's
|
||||||
* The length of the BytesRef's buffer may be not large enough, so you need to grow.
|
* final encoding, and returns its hashcode.
|
||||||
* The parameters' {@code bytes} is guaranteed to be not {@code null}.
|
|
||||||
* @return the hashcode as defined by {@link BytesRef#hashCode}:
|
* @return the hashcode as defined by {@link BytesRef#hashCode}:
|
||||||
* <pre>
|
* <pre>
|
||||||
* int hash = 0;
|
* int hash = 0;
|
||||||
|
@ -45,5 +66,12 @@ public interface TermToBytesRefAttribute extends Attribute {
|
||||||
* the hash on-the-fly. If this is not the case, just return
|
* the hash on-the-fly. If this is not the case, just return
|
||||||
* {@code termBytes.hashCode()}.
|
* {@code termBytes.hashCode()}.
|
||||||
*/
|
*/
|
||||||
public int toBytesRef(BytesRef termBytes);
|
public int fillBytesRef();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve this attribute's BytesRef. The bytes are updated
|
||||||
|
* from the current term when the consumer calls {@link #fillBytesRef()}.
|
||||||
|
* @return this Attributes internal BytesRef.
|
||||||
|
*/
|
||||||
|
public BytesRef getBytesRef();
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
|
||||||
final DocumentsWriter.DocState docState;
|
final DocumentsWriter.DocState docState;
|
||||||
final FieldInvertState fieldState;
|
final FieldInvertState fieldState;
|
||||||
TermToBytesRefAttribute termAtt;
|
TermToBytesRefAttribute termAtt;
|
||||||
|
BytesRef termBytesRef;
|
||||||
|
|
||||||
// Copied from our perThread
|
// Copied from our perThread
|
||||||
final IntBlockPool intPool;
|
final IntBlockPool intPool;
|
||||||
|
@ -53,7 +54,6 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
|
||||||
final BytesRefHash bytesHash;
|
final BytesRefHash bytesHash;
|
||||||
|
|
||||||
ParallelPostingsArray postingsArray;
|
ParallelPostingsArray postingsArray;
|
||||||
private final BytesRef termBytesRef;
|
|
||||||
private final AtomicLong bytesUsed;
|
private final AtomicLong bytesUsed;
|
||||||
|
|
||||||
public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) {
|
public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) {
|
||||||
|
@ -70,7 +70,6 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
|
||||||
bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
|
bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
|
||||||
streamCount = consumer.getStreamCount();
|
streamCount = consumer.getStreamCount();
|
||||||
numPostingInt = 2*streamCount;
|
numPostingInt = 2*streamCount;
|
||||||
termBytesRef = perThread.termBytesRef;
|
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
if (nextPerThread != null)
|
if (nextPerThread != null)
|
||||||
nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);
|
nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);
|
||||||
|
@ -119,6 +118,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
|
||||||
@Override
|
@Override
|
||||||
void start(Fieldable f) {
|
void start(Fieldable f) {
|
||||||
termAtt = fieldState.attributeSource.getAttribute(TermToBytesRefAttribute.class);
|
termAtt = fieldState.attributeSource.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
termBytesRef = termAtt.getBytesRef();
|
||||||
consumer.start(f);
|
consumer.start(f);
|
||||||
if (nextPerField != null) {
|
if (nextPerField != null) {
|
||||||
nextPerField.start(f);
|
nextPerField.start(f);
|
||||||
|
@ -181,7 +181,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
|
||||||
// Get the text & hash of this term.
|
// Get the text & hash of this term.
|
||||||
int termID;
|
int termID;
|
||||||
try{
|
try{
|
||||||
termID = bytesHash.add(termBytesRef, termAtt.toBytesRef(termBytesRef));
|
termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef());
|
||||||
}catch (MaxBytesLengthExceededException e) {
|
}catch (MaxBytesLengthExceededException e) {
|
||||||
// Not enough room in current block
|
// Not enough room in current block
|
||||||
// Just skip this term, to remain as robust as
|
// Just skip this term, to remain as robust as
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.util.ByteBlockPool;
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
@ -35,8 +34,6 @@ final class TermsHashPerThread extends InvertedDocConsumerPerThread {
|
||||||
|
|
||||||
final boolean primary;
|
final boolean primary;
|
||||||
final DocumentsWriter.DocState docState;
|
final DocumentsWriter.DocState docState;
|
||||||
// Used by perField to obtain terms from the analysis chain
|
|
||||||
final BytesRef termBytesRef = new BytesRef(10);
|
|
||||||
|
|
||||||
public TermsHashPerThread(DocInverterPerThread docInverterPerThread, final TermsHash termsHash, final TermsHash nextTermsHash, final TermsHashPerThread primaryPerThread) {
|
public TermsHashPerThread(DocInverterPerThread docInverterPerThread, final TermsHash termsHash, final TermsHash nextTermsHash, final TermsHashPerThread primaryPerThread) {
|
||||||
docState = docInverterPerThread.docState;
|
docState = docInverterPerThread.docState;
|
||||||
|
|
|
@ -532,18 +532,19 @@ public abstract class QueryParserBase {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();
|
||||||
|
|
||||||
if (numTokens == 0)
|
if (numTokens == 0)
|
||||||
return null;
|
return null;
|
||||||
else if (numTokens == 1) {
|
else if (numTokens == 1) {
|
||||||
BytesRef term = new BytesRef();
|
|
||||||
try {
|
try {
|
||||||
boolean hasNext = buffer.incrementToken();
|
boolean hasNext = buffer.incrementToken();
|
||||||
assert hasNext == true;
|
assert hasNext == true;
|
||||||
termAtt.toBytesRef(term);
|
termAtt.fillBytesRef();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// safe to ignore, because we know the number of tokens
|
// safe to ignore, because we know the number of tokens
|
||||||
}
|
}
|
||||||
return newTermQuery(new Term(field, term));
|
return newTermQuery(new Term(field, new BytesRef(bytes)));
|
||||||
} else {
|
} else {
|
||||||
if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) {
|
if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) {
|
||||||
if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) {
|
if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) {
|
||||||
|
@ -554,17 +555,15 @@ public abstract class QueryParserBase {
|
||||||
BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
|
BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
|
||||||
|
|
||||||
for (int i = 0; i < numTokens; i++) {
|
for (int i = 0; i < numTokens; i++) {
|
||||||
BytesRef term = new BytesRef();
|
|
||||||
try {
|
try {
|
||||||
boolean hasNext = buffer.incrementToken();
|
boolean hasNext = buffer.incrementToken();
|
||||||
assert hasNext == true;
|
assert hasNext == true;
|
||||||
termAtt.toBytesRef(term);
|
termAtt.fillBytesRef();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// safe to ignore, because we know the number of tokens
|
// safe to ignore, because we know the number of tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
Query currentQuery = newTermQuery(
|
Query currentQuery = newTermQuery(
|
||||||
new Term(field, term));
|
new Term(field, new BytesRef(bytes)));
|
||||||
q.add(currentQuery, occur);
|
q.add(currentQuery, occur);
|
||||||
}
|
}
|
||||||
return q;
|
return q;
|
||||||
|
@ -576,12 +575,11 @@ public abstract class QueryParserBase {
|
||||||
List<Term> multiTerms = new ArrayList<Term>();
|
List<Term> multiTerms = new ArrayList<Term>();
|
||||||
int position = -1;
|
int position = -1;
|
||||||
for (int i = 0; i < numTokens; i++) {
|
for (int i = 0; i < numTokens; i++) {
|
||||||
BytesRef term = new BytesRef();
|
|
||||||
int positionIncrement = 1;
|
int positionIncrement = 1;
|
||||||
try {
|
try {
|
||||||
boolean hasNext = buffer.incrementToken();
|
boolean hasNext = buffer.incrementToken();
|
||||||
assert hasNext == true;
|
assert hasNext == true;
|
||||||
termAtt.toBytesRef(term);
|
termAtt.fillBytesRef();
|
||||||
if (posIncrAtt != null) {
|
if (posIncrAtt != null) {
|
||||||
positionIncrement = posIncrAtt.getPositionIncrement();
|
positionIncrement = posIncrAtt.getPositionIncrement();
|
||||||
}
|
}
|
||||||
|
@ -598,7 +596,7 @@ public abstract class QueryParserBase {
|
||||||
multiTerms.clear();
|
multiTerms.clear();
|
||||||
}
|
}
|
||||||
position += positionIncrement;
|
position += positionIncrement;
|
||||||
multiTerms.add(new Term(field, term));
|
multiTerms.add(new Term(field, new BytesRef(bytes)));
|
||||||
}
|
}
|
||||||
if (enablePositionIncrements) {
|
if (enablePositionIncrements) {
|
||||||
mpq.add(multiTerms.toArray(new Term[0]),position);
|
mpq.add(multiTerms.toArray(new Term[0]),position);
|
||||||
|
@ -613,15 +611,13 @@ public abstract class QueryParserBase {
|
||||||
pq.setSlop(phraseSlop);
|
pq.setSlop(phraseSlop);
|
||||||
int position = -1;
|
int position = -1;
|
||||||
|
|
||||||
|
|
||||||
for (int i = 0; i < numTokens; i++) {
|
for (int i = 0; i < numTokens; i++) {
|
||||||
BytesRef term = new BytesRef();
|
|
||||||
int positionIncrement = 1;
|
int positionIncrement = 1;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
boolean hasNext = buffer.incrementToken();
|
boolean hasNext = buffer.incrementToken();
|
||||||
assert hasNext == true;
|
assert hasNext == true;
|
||||||
termAtt.toBytesRef(term);
|
termAtt.fillBytesRef();
|
||||||
if (posIncrAtt != null) {
|
if (posIncrAtt != null) {
|
||||||
positionIncrement = posIncrAtt.getPositionIncrement();
|
positionIncrement = posIncrAtt.getPositionIncrement();
|
||||||
}
|
}
|
||||||
|
@ -631,9 +627,9 @@ public abstract class QueryParserBase {
|
||||||
|
|
||||||
if (enablePositionIncrements) {
|
if (enablePositionIncrements) {
|
||||||
position += positionIncrement;
|
position += positionIncrement;
|
||||||
pq.add(new Term(field, term),position);
|
pq.add(new Term(field, new BytesRef(bytes)),position);
|
||||||
} else {
|
} else {
|
||||||
pq.add(new Term(field, term));
|
pq.add(new Term(field, new BytesRef(bytes)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return pq;
|
return pq;
|
||||||
|
@ -796,13 +792,13 @@ public abstract class QueryParserBase {
|
||||||
source = analyzer.tokenStream(field, new StringReader(part));
|
source = analyzer.tokenStream(field, new StringReader(part));
|
||||||
}
|
}
|
||||||
|
|
||||||
BytesRef result = new BytesRef();
|
|
||||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (!source.incrementToken())
|
if (!source.incrementToken())
|
||||||
throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
|
throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
|
||||||
termAtt.toBytesRef(result);
|
termAtt.fillBytesRef();
|
||||||
if (source.incrementToken())
|
if (source.incrementToken())
|
||||||
throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part);
|
throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -812,8 +808,8 @@ public abstract class QueryParserBase {
|
||||||
try {
|
try {
|
||||||
source.close();
|
source.close();
|
||||||
} catch (IOException ignored) {}
|
} catch (IOException ignored) {}
|
||||||
|
|
||||||
return result;
|
return new BytesRef(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -66,10 +66,10 @@ public class QueryTermVector implements TermFreqVector {
|
||||||
final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
|
||||||
hasMoreTokens = stream.incrementToken();
|
hasMoreTokens = stream.incrementToken();
|
||||||
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
while (hasMoreTokens) {
|
while (hasMoreTokens) {
|
||||||
BytesRef bytes = new BytesRef();
|
termAtt.fillBytesRef();
|
||||||
termAtt.toBytesRef(bytes);
|
terms.add(new BytesRef(bytes));
|
||||||
terms.add(bytes);
|
|
||||||
hasMoreTokens = stream.incrementToken();
|
hasMoreTokens = stream.incrementToken();
|
||||||
}
|
}
|
||||||
processTerms(terms.toArray(new BytesRef[terms.size()]));
|
processTerms(terms.toArray(new BytesRef[terms.size()]));
|
||||||
|
|
|
@ -35,13 +35,13 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
|
||||||
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
|
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
|
||||||
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
|
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
|
||||||
final BytesRef bytes = new BytesRef();
|
final BytesRef bytes = bytesAtt.getBytesRef();
|
||||||
stream.reset();
|
stream.reset();
|
||||||
assertEquals(64, numericAtt.getValueSize());
|
assertEquals(64, numericAtt.getValueSize());
|
||||||
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
|
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
|
||||||
assertTrue("New token is available", stream.incrementToken());
|
assertTrue("New token is available", stream.incrementToken());
|
||||||
assertEquals("Shift value wrong", shift, numericAtt.getShift());
|
assertEquals("Shift value wrong", shift, numericAtt.getShift());
|
||||||
final int hash = bytesAtt.toBytesRef(bytes);
|
final int hash = bytesAtt.fillBytesRef();
|
||||||
assertEquals("Hash incorrect", bytes.hashCode(), hash);
|
assertEquals("Hash incorrect", bytes.hashCode(), hash);
|
||||||
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
|
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
|
||||||
assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
|
assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
|
||||||
|
@ -58,13 +58,13 @@ public class TestNumericTokenStream extends BaseTokenStreamTestCase {
|
||||||
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
|
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
|
||||||
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
|
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
|
||||||
final BytesRef bytes = new BytesRef();
|
final BytesRef bytes = bytesAtt.getBytesRef();
|
||||||
stream.reset();
|
stream.reset();
|
||||||
assertEquals(32, numericAtt.getValueSize());
|
assertEquals(32, numericAtt.getValueSize());
|
||||||
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
|
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
|
||||||
assertTrue("New token is available", stream.incrementToken());
|
assertTrue("New token is available", stream.incrementToken());
|
||||||
assertEquals("Shift value wrong", shift, numericAtt.getShift());
|
assertEquals("Shift value wrong", shift, numericAtt.getShift());
|
||||||
final int hash = bytesAtt.toBytesRef(bytes);
|
final int hash = bytesAtt.fillBytesRef();
|
||||||
assertEquals("Hash incorrect", bytes.hashCode(), hash);
|
assertEquals("Hash incorrect", bytes.hashCode(), hash);
|
||||||
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
|
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
|
||||||
assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
|
assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
|
||||||
|
|
|
@ -74,12 +74,14 @@ public class Test2BTerms extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static class MyTermAttributeImpl extends AttributeImpl implements TermToBytesRefAttribute {
|
private final static class MyTermAttributeImpl extends AttributeImpl implements TermToBytesRefAttribute {
|
||||||
public int toBytesRef(BytesRef bs) {
|
public int fillBytesRef() {
|
||||||
bs.bytes = bytes.bytes;
|
|
||||||
bs.offset = bytes.offset;
|
|
||||||
bs.length = bytes.length;
|
|
||||||
return bytes.hashCode();
|
return bytes.hashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public BytesRef getBytesRef() {
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,11 +40,12 @@ public class CollatedTermAttributeImpl extends CharTermAttributeImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int toBytesRef(BytesRef target) {
|
public int fillBytesRef() {
|
||||||
target.bytes = collator.getCollationKey(toString()).toByteArray();
|
BytesRef bytes = getBytesRef();
|
||||||
target.offset = 0;
|
bytes.bytes = collator.getCollationKey(toString()).toByteArray();
|
||||||
target.length = target.bytes.length;
|
bytes.offset = 0;
|
||||||
return target.hashCode();
|
bytes.length = bytes.bytes.length;
|
||||||
|
return bytes.hashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -283,7 +283,6 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
int numTestPoints = 100;
|
int numTestPoints = 100;
|
||||||
int numThreads = _TestUtil.nextInt(random, 3, 5);
|
int numThreads = _TestUtil.nextInt(random, 3, 5);
|
||||||
final HashMap<String,BytesRef> map = new HashMap<String,BytesRef>();
|
final HashMap<String,BytesRef> map = new HashMap<String,BytesRef>();
|
||||||
BytesRef spare = new BytesRef();
|
|
||||||
|
|
||||||
// create a map<String,SortKey> up front.
|
// create a map<String,SortKey> up front.
|
||||||
// then with multiple threads, generate sort keys for all the keys in the map
|
// then with multiple threads, generate sort keys for all the keys in the map
|
||||||
|
@ -292,12 +291,13 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
for (int i = 0; i < numTestPoints; i++) {
|
for (int i = 0; i < numTestPoints; i++) {
|
||||||
String term = randomString();
|
String term = randomString();
|
||||||
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
|
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
|
||||||
TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
assertTrue(ts.incrementToken());
|
assertTrue(ts.incrementToken());
|
||||||
bytes.toBytesRef(spare);
|
termAtt.fillBytesRef();
|
||||||
// ensure we make a copy of the actual bytes too
|
// ensure we make a copy of the actual bytes too
|
||||||
map.put(term, new BytesRef(spare));
|
map.put(term, new BytesRef(bytes));
|
||||||
}
|
}
|
||||||
|
|
||||||
Thread threads[] = new Thread[numThreads];
|
Thread threads[] = new Thread[numThreads];
|
||||||
|
@ -306,16 +306,16 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
try {
|
||||||
BytesRef spare = new BytesRef();
|
|
||||||
for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
|
for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
|
||||||
String term = mapping.getKey();
|
String term = mapping.getKey();
|
||||||
BytesRef expected = mapping.getValue();
|
BytesRef expected = mapping.getValue();
|
||||||
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
|
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
|
||||||
TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
assertTrue(ts.incrementToken());
|
assertTrue(ts.incrementToken());
|
||||||
bytes.toBytesRef(spare);
|
termAtt.fillBytesRef();
|
||||||
assertEquals(expected, spare);
|
assertEquals(expected, bytes);
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
|
@ -30,7 +30,7 @@ import com.ibm.icu.text.RawCollationKey;
|
||||||
public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
|
public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
|
||||||
private final Collator collator;
|
private final Collator collator;
|
||||||
private final RawCollationKey key = new RawCollationKey();
|
private final RawCollationKey key = new RawCollationKey();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new ICUCollatedTermAttributeImpl
|
* Create a new ICUCollatedTermAttributeImpl
|
||||||
* @param collator Collation key generator
|
* @param collator Collation key generator
|
||||||
|
@ -43,13 +43,14 @@ public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int toBytesRef(BytesRef target) {
|
public int fillBytesRef() {
|
||||||
|
BytesRef bytes = getBytesRef();
|
||||||
collator.getRawCollationKey(toString(), key);
|
collator.getRawCollationKey(toString(), key);
|
||||||
target.bytes = key.bytes;
|
bytes.bytes = key.bytes;
|
||||||
target.offset = 0;
|
bytes.offset = 0;
|
||||||
target.length = key.size;
|
bytes.length = key.size;
|
||||||
return target.hashCode();
|
return bytes.hashCode();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,6 @@ import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.response.SolrQueryResponse;
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.util.ByteUtils;
|
|
||||||
|
|
||||||
import org.apache.noggit.CharArr;
|
import org.apache.noggit.CharArr;
|
||||||
|
|
||||||
|
@ -141,12 +140,12 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||||
final Set<BytesRef> tokens = new HashSet<BytesRef>();
|
final Set<BytesRef> tokens = new HashSet<BytesRef>();
|
||||||
final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
|
final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
|
||||||
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
final BytesRef bytes = bytesAtt.getBytesRef();
|
||||||
try {
|
try {
|
||||||
tokenStream.reset();
|
tokenStream.reset();
|
||||||
while (tokenStream.incrementToken()) {
|
while (tokenStream.incrementToken()) {
|
||||||
final BytesRef bytes = new BytesRef();
|
bytesAtt.fillBytesRef();
|
||||||
bytesAtt.toBytesRef(bytes);
|
tokens.add(new BytesRef(bytes));
|
||||||
tokens.add(bytes);
|
|
||||||
}
|
}
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
|
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
|
||||||
|
@ -236,12 +235,13 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||||
|
|
||||||
FieldType fieldType = context.getFieldType();
|
FieldType fieldType = context.getFieldType();
|
||||||
|
|
||||||
final BytesRef rawBytes = new BytesRef();
|
|
||||||
final CharArr textBuf = new CharArr();
|
final CharArr textBuf = new CharArr();
|
||||||
for (int i = 0, c = tokens.size(); i < c; i++) {
|
for (int i = 0, c = tokens.size(); i < c; i++) {
|
||||||
AttributeSource token = tokens.get(i);
|
AttributeSource token = tokens.get(i);
|
||||||
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
|
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
|
||||||
token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(rawBytes);
|
final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
BytesRef rawBytes = termAtt.getBytesRef();
|
||||||
|
termAtt.fillBytesRef();
|
||||||
|
|
||||||
textBuf.reset();
|
textBuf.reset();
|
||||||
fieldType.indexedToReadable(rawBytes, textBuf);
|
fieldType.indexedToReadable(rawBytes, textBuf);
|
||||||
|
|
|
@ -156,10 +156,10 @@
|
||||||
TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
tstream.reset();
|
tstream.reset();
|
||||||
matches = new HashSet<BytesRef>();
|
matches = new HashSet<BytesRef>();
|
||||||
|
final BytesRef bytes = bytesAtt.getBytesRef();
|
||||||
while (tstream.incrementToken()) {
|
while (tstream.incrementToken()) {
|
||||||
final BytesRef bytes = new BytesRef();
|
bytesAtt.fillBytesRef();
|
||||||
bytesAtt.toBytesRef(bytes);
|
matches.add(new BytesRef(bytes));
|
||||||
matches.add(bytes);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -273,14 +273,17 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class Tok {
|
private static class Tok {
|
||||||
final BytesRef bytes = new BytesRef();
|
final BytesRef bytes;
|
||||||
final String rawText, text;
|
final String rawText, text;
|
||||||
final int pos;
|
final int pos;
|
||||||
final List<ReflectItem> reflected = new ArrayList<ReflectItem>();
|
final List<ReflectItem> reflected = new ArrayList<ReflectItem>();
|
||||||
|
|
||||||
Tok(AttributeSource token, int pos, FieldType ft) {
|
Tok(AttributeSource token, int pos, FieldType ft) {
|
||||||
this.pos = pos;
|
this.pos = pos;
|
||||||
token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(bytes);
|
TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
BytesRef spare = termAtt.getBytesRef();
|
||||||
|
termAtt.fillBytesRef();
|
||||||
|
bytes = new BytesRef(spare);
|
||||||
rawText = (token.hasAttribute(CharTermAttribute.class)) ?
|
rawText = (token.hasAttribute(CharTermAttribute.class)) ?
|
||||||
token.getAttribute(CharTermAttribute.class).toString() : null;
|
token.getAttribute(CharTermAttribute.class).toString() : null;
|
||||||
final CharArr textBuf = new CharArr(bytes.length);
|
final CharArr textBuf = new CharArr(bytes.length);
|
||||||
|
|
Loading…
Reference in New Issue