LUCENE-1062: Add setData(byte[] data), setData(byte[] data, int offset, int length), getData(), getOffset() and clone() methods to o.a.l.index.Payload. Also add the field name as arg to Similarity.scorePayload().

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@597741 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2007-11-23 21:17:32 +00:00
parent f7ee9f6dfb
commit 54e5550c4d
10 changed files with 95 additions and 75 deletions

View File

@ -63,6 +63,11 @@ API Changes
7. LUCENE-743: Add IndexReader.reopen() method that re-opens an 7. LUCENE-743: Add IndexReader.reopen() method that re-opens an
existing IndexReader (see New features -> 9.) (Michael Busch) existing IndexReader (see New features -> 9.) (Michael Busch)
8. LUCENE-1062: Add setData(byte[] data),
setData(byte[] data, int offset, int length), getData(), getOffset()
and clone() methods to o.a.l.index.Payload. Also add the field name
as arg to Similarity.scorePayload(). (Michael Busch)
Bug fixes Bug fixes
1. LUCENE-933: QueryParser fixed to not produce empty sub 1. LUCENE-933: QueryParser fixed to not produce empty sub

View File

@ -73,9 +73,6 @@ import org.apache.lucene.index.TermPositions;
@see org.apache.lucene.index.Payload @see org.apache.lucene.index.Payload
*/ */
// TODO: Remove warning after API has been finalized
public class Token implements Cloneable { public class Token implements Cloneable {
private static final String DEFAULT_TYPE = "word"; private static final String DEFAULT_TYPE = "word";
@ -322,24 +319,14 @@ public class Token implements Cloneable {
/** /**
* Returns this Token's payload. * Returns this Token's payload.
* <p><font color="#FF0000">
* WARNING: The status of the <b>Payloads</b> feature is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*/ */
// TODO: Remove warning after API has been finalized
public Payload getPayload() { public Payload getPayload() {
return this.payload; return this.payload;
} }
/** /**
* Sets this Token's payload. * Sets this Token's payload.
* <p><font color="#FF0000">
* WARNING: The status of the <b>Payloads</b> feature is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*/ */
// TODO: Remove warning after API has been finalized
public void setPayload(Payload payload) { public void setPayload(Payload payload) {
this.payload = payload; this.payload = payload;
} }
@ -381,6 +368,9 @@ public class Token implements Cloneable {
t.termBuffer = null; t.termBuffer = null;
t.setTermBuffer(termBuffer, 0, termLength); t.setTermBuffer(termBuffer, 0, termLength);
} }
if (payload != null) {
t.setPayload((Payload) payload.clone());
}
return t; return t;
} catch (CloneNotSupportedException e) { } catch (CloneNotSupportedException e) {
throw new RuntimeException(e); // shouldn't happen throw new RuntimeException(e); // shouldn't happen

View File

@ -46,8 +46,9 @@ public abstract class TokenStream {
if (result != null) { if (result != null) {
Payload p = result.getPayload(); Payload p = result.getPayload();
if (p != null) if (p != null) {
result.setPayload(new Payload(p.toByteArray(), 0, p.length())); result.setPayload((Payload) p.clone());
}
} }
return result; return result;
@ -74,7 +75,7 @@ public abstract class TokenStream {
* implement this method. Reset() is not needed for * implement this method. Reset() is not needed for
* the standard indexing process. However, if the Tokens * the standard indexing process. However, if the Tokens
* of a TokenStream are intended to be consumed more than * of a TokenStream are intended to be consumed more than
* once, it is neccessary to implement reset(). * once, it is necessary to implement reset().
*/ */
public void reset() throws IOException {} public void reset() throws IOException {}

View File

@ -32,15 +32,9 @@ import org.apache.lucene.analysis.TokenStream;
* <p> * <p>
* Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} * Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
* to retrieve the payloads from the index.<br> * to retrieve the payloads from the index.<br>
* <br>
* *
* <p><font color="#FF0000">
* WARNING: The status of the <b>Payloads</b> feature is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*/ */
// TODO: Remove warning after API has been finalized public class Payload implements Serializable, Cloneable {
public class Payload implements Serializable {
/** the byte array containing the payload data */ /** the byte array containing the payload data */
protected byte[] data; protected byte[] data;
@ -51,12 +45,14 @@ import org.apache.lucene.analysis.TokenStream;
protected int length; protected int length;
/** Creates an empty payload and does not allocate a byte array. */ /** Creates an empty payload and does not allocate a byte array. */
protected Payload() { public Payload() {
// no-arg constructor since this class implements Serializable // nothing to do
} }
/** /**
* Creates a new payload with the the given array as data. * Creates a new payload with the the given array as data.
* A reference to the passed-in array is held, i. e. no
* copy is made.
* *
* @param data the data of this payload * @param data the data of this payload
*/ */
@ -66,6 +62,8 @@ import org.apache.lucene.analysis.TokenStream;
/** /**
* Creates a new payload with the the given array as data. * Creates a new payload with the the given array as data.
* A reference to the passed-in array is held, i. e. no
* copy is made.
* *
* @param data the data of this payload * @param data the data of this payload
* @param offset the offset in the data byte array * @param offset the offset in the data byte array
@ -80,6 +78,41 @@ import org.apache.lucene.analysis.TokenStream;
this.length = length; this.length = length;
} }
/**
* Sets this payloads data.
* A reference to the passed-in array is held, i. e. no
* copy is made.
*/
public void setData(byte[] data) {
setData(data, 0, data.length);
}
/**
* Sets this payloads data.
* A reference to the passed-in array is held, i. e. no
* copy is made.
*/
public void setData(byte[] data, int offset, int length) {
this.data = data;
this.offset = offset;
this.length = length;
}
/**
* Returns a reference to the underlying byte array
* that holds this payloads data.
*/
public byte[] getData() {
return this.data;
}
/**
* Returns the offset in the underlying byte array
*/
public int getOffset() {
return this.offset;
}
/** /**
* Returns the length of the payload data. * Returns the length of the payload data.
*/ */
@ -118,4 +151,13 @@ import org.apache.lucene.analysis.TokenStream;
} }
System.arraycopy(this.data, this.offset, target, targetOffset, this.length); System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
} }
/**
* Clones this payload by creating a copy of the underlying
* byte array.
*/
public Object clone() {
Payload clone = new Payload(this.toByteArray());
return clone;
}
} }

View File

@ -43,14 +43,8 @@ public interface TermPositions
* Returns the length of the payload at the current term position. * Returns the length of the payload at the current term position.
* This is invalid until {@link #nextPosition()} is called for * This is invalid until {@link #nextPosition()} is called for
* the first time.<br> * the first time.<br>
* <br>
* <p><font color="#FF0000">
* WARNING: The status of the <b>Payloads</b> feature is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* @return length of the current payload in number of bytes * @return length of the current payload in number of bytes
*/ */
// TODO: Remove warning after API has been finalized
int getPayloadLength(); int getPayloadLength();
/** /**
@ -61,11 +55,6 @@ public interface TermPositions
* of {@link #nextPosition()}. However, payloads are loaded lazily, * of {@link #nextPosition()}. However, payloads are loaded lazily,
* so if the payload data for the current position is not needed, * so if the payload data for the current position is not needed,
* this method may not be called at all for performance reasons.<br> * this method may not be called at all for performance reasons.<br>
* <br>
* <p><font color="#FF0000">
* WARNING: The status of the <b>Payloads</b> feature is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* *
* @param data the array into which the data of this payload is to be * @param data the array into which the data of this payload is to be
* stored, if it is big enough; otherwise, a new byte[] array * stored, if it is big enough; otherwise, a new byte[] array
@ -75,7 +64,6 @@ public interface TermPositions
* @return a byte[] array containing the data of this payload * @return a byte[] array containing the data of this payload
* @throws IOException * @throws IOException
*/ */
// TODO: Remove warning after API has been finalized
byte[] getPayload(byte[] data, int offset) throws IOException; byte[] getPayload(byte[] data, int offset) throws IOException;
/** /**
@ -84,14 +72,8 @@ public interface TermPositions
* Payloads can only be loaded once per call to * Payloads can only be loaded once per call to
* {@link #nextPosition()}. * {@link #nextPosition()}.
* *
* <p><font color="#FF0000">
* WARNING: The status of the <b>Payloads</b> feature is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @return true if there is a payload available at this position that can be loaded * @return true if there is a payload available at this position that can be loaded
*/ */
// TODO: Remove warning after API has been finalized
public boolean isPayloadAvailable(); public boolean isPayloadAvailable();
} }

View File

@ -513,19 +513,14 @@ public abstract class Similarity implements Serializable {
* what is in the byte array. * what is in the byte array.
* <p> * <p>
* The default implementation returns 1. * The default implementation returns 1.
* <br>
* <p><font color="#FF0000">
* WARNING: The status of the <b>Payloads</b> feature is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* *
* @param fieldName The fieldName of the term this payload belongs to
* @param payload The payload byte array to be scored * @param payload The payload byte array to be scored
* @param offset The offset into the payload array * @param offset The offset into the payload array
* @param length The length in the array * @param length The length in the array
* @return An implementation dependent float to be used as a scoring factor * @return An implementation dependent float to be used as a scoring factor
*/ */
// TODO: Remove warning after API has been finalized public float scorePayload(String fieldName, byte [] payload, int offset, int length)
public float scorePayload(byte [] payload, int offset, int length)
{ {
//Do nothing //Do nothing
return 1; return 1;

View File

@ -37,11 +37,6 @@ import java.io.IOException;
* <p> * <p>
* Payload scores are averaged across term occurrences in the document. * Payload scores are averaged across term occurrences in the document.
* *
* <p><font color="#FF0000">
* WARNING: The status of the <b>Payloads</b> feature is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @see org.apache.lucene.search.Similarity#scorePayload(byte[], int, int) * @see org.apache.lucene.search.Similarity#scorePayload(byte[], int, int)
*/ */
public class BoostingTermQuery extends SpanTermQuery{ public class BoostingTermQuery extends SpanTermQuery{
@ -136,7 +131,7 @@ public class BoostingTermQuery extends SpanTermQuery{
protected void processPayload(Similarity similarity) throws IOException { protected void processPayload(Similarity similarity) throws IOException {
if (positions.isPayloadAvailable()) { if (positions.isPayloadAvailable()) {
payload = positions.getPayload(payload, 0); payload = positions.getPayload(payload, 0);
payloadScore += similarity.scorePayload(payload, 0, positions.getPayloadLength()); payloadScore += similarity.scorePayload(term.field(), payload, 0, positions.getPayloadLength());
payloadsSeen++; payloadsSeen++;
} else { } else {

View File

@ -29,13 +29,6 @@
<li><a href="./BoostingTermQuery.html">BoostingTermQuery</a> -- Boost a term's score based on the value of the payload located at that term</li> <li><a href="./BoostingTermQuery.html">BoostingTermQuery</a> -- Boost a term's score based on the value of the payload located at that term</li>
</ol> </ol>
</div> </div>
<DIV>
<font color="#FF0000">
WARNING: The status of the <b>Payloads</b> feature is experimental. The APIs
introduced here might change in the future and will not be supported anymore
in such a case.
</font>
</DIV>
<DIV>&nbsp;</DIV> <DIV>&nbsp;</DIV>
<DIV align="center"> <DIV align="center">
</DIV> </DIV>

View File

@ -81,6 +81,13 @@ public class TestPayloads extends LuceneTestCase {
} catch (Exception expected) { } catch (Exception expected) {
// expected exception // expected exception
} }
Payload clone = (Payload) payload.clone();
assertEquals(payload.length(), clone.length());
for (int i = 0; i < payload.length(); i++) {
assertEquals(payload.byteAt(i), clone.byteAt(i));
}
} }
// Tests whether the DocumentWriter and SegmentMerger correctly enable the // Tests whether the DocumentWriter and SegmentMerger correctly enable the
@ -429,6 +436,7 @@ public class TestPayloads extends LuceneTestCase {
private byte[] data; private byte[] data;
private int length; private int length;
private int offset; private int offset;
Payload payload = new Payload();
public PayloadFilter(TokenStream in, byte[] data, int offset, int length) { public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
super(in); super(in);
@ -437,14 +445,23 @@ public class TestPayloads extends LuceneTestCase {
this.offset = offset; this.offset = offset;
} }
public Token next() throws IOException { public Token next(Token token) throws IOException {
Token nextToken = input.next(); token = input.next(token);
if (nextToken != null && offset + length <= data.length) { if (token != null) {
nextToken.setPayload(new Payload(data, offset, length)); if (offset + length <= data.length) {
offset += length; Payload p = null;
if (p == null) {
p = new Payload();
token.setPayload(p);
}
p.setData(data, offset, length);
offset += length;
} else {
token.setPayload(null);
}
} }
return nextToken; return token;
} }
} }

View File

@ -196,7 +196,7 @@ public class TestBoostingTermQuery extends LuceneTestCase {
static class BoostingSimilarity extends DefaultSimilarity { static class BoostingSimilarity extends DefaultSimilarity {
// TODO: Remove warning after API has been finalized // TODO: Remove warning after API has been finalized
public float scorePayload(byte[] payload, int offset, int length) { public float scorePayload(String fieldName, byte[] payload, int offset, int length) {
//we know it is size 4 here, so ignore the offset/length //we know it is size 4 here, so ignore the offset/length
return payload[0]; return payload[0];
} }