mirror of
https://github.com/apache/lucene.git
synced 2025-03-08 01:25:19 +00:00
LUCENE-1062: Add setData(byte[] data), setData(byte[] data, int offset, int length), getData(), getOffset() and clone() methods to o.a.l.index.Payload. Also add the field name as arg to Similarity.scorePayload().
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@597741 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f7ee9f6dfb
commit
54e5550c4d
@ -63,6 +63,11 @@ API Changes
|
|||||||
7. LUCENE-743: Add IndexReader.reopen() method that re-opens an
|
7. LUCENE-743: Add IndexReader.reopen() method that re-opens an
|
||||||
existing IndexReader (see New features -> 9.) (Michael Busch)
|
existing IndexReader (see New features -> 9.) (Michael Busch)
|
||||||
|
|
||||||
|
8. LUCENE-1062: Add setData(byte[] data),
|
||||||
|
setData(byte[] data, int offset, int length), getData(), getOffset()
|
||||||
|
and clone() methods to o.a.l.index.Payload. Also add the field name
|
||||||
|
as arg to Similarity.scorePayload(). (Michael Busch)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
1. LUCENE-933: QueryParser fixed to not produce empty sub
|
1. LUCENE-933: QueryParser fixed to not produce empty sub
|
||||||
|
@ -73,9 +73,6 @@ import org.apache.lucene.index.TermPositions;
|
|||||||
|
|
||||||
@see org.apache.lucene.index.Payload
|
@see org.apache.lucene.index.Payload
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// TODO: Remove warning after API has been finalized
|
|
||||||
|
|
||||||
public class Token implements Cloneable {
|
public class Token implements Cloneable {
|
||||||
|
|
||||||
private static final String DEFAULT_TYPE = "word";
|
private static final String DEFAULT_TYPE = "word";
|
||||||
@ -322,24 +319,14 @@ public class Token implements Cloneable {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns this Token's payload.
|
* Returns this Token's payload.
|
||||||
* <p><font color="#FF0000">
|
|
||||||
* WARNING: The status of the <b>Payloads</b> feature is experimental.
|
|
||||||
* The APIs introduced here might change in the future and will not be
|
|
||||||
* supported anymore in such a case.</font>
|
|
||||||
*/
|
*/
|
||||||
// TODO: Remove warning after API has been finalized
|
|
||||||
public Payload getPayload() {
|
public Payload getPayload() {
|
||||||
return this.payload;
|
return this.payload;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets this Token's payload.
|
* Sets this Token's payload.
|
||||||
* <p><font color="#FF0000">
|
|
||||||
* WARNING: The status of the <b>Payloads</b> feature is experimental.
|
|
||||||
* The APIs introduced here might change in the future and will not be
|
|
||||||
* supported anymore in such a case.</font>
|
|
||||||
*/
|
*/
|
||||||
// TODO: Remove warning after API has been finalized
|
|
||||||
public void setPayload(Payload payload) {
|
public void setPayload(Payload payload) {
|
||||||
this.payload = payload;
|
this.payload = payload;
|
||||||
}
|
}
|
||||||
@ -381,6 +368,9 @@ public class Token implements Cloneable {
|
|||||||
t.termBuffer = null;
|
t.termBuffer = null;
|
||||||
t.setTermBuffer(termBuffer, 0, termLength);
|
t.setTermBuffer(termBuffer, 0, termLength);
|
||||||
}
|
}
|
||||||
|
if (payload != null) {
|
||||||
|
t.setPayload((Payload) payload.clone());
|
||||||
|
}
|
||||||
return t;
|
return t;
|
||||||
} catch (CloneNotSupportedException e) {
|
} catch (CloneNotSupportedException e) {
|
||||||
throw new RuntimeException(e); // shouldn't happen
|
throw new RuntimeException(e); // shouldn't happen
|
||||||
|
@ -46,8 +46,9 @@ public abstract class TokenStream {
|
|||||||
|
|
||||||
if (result != null) {
|
if (result != null) {
|
||||||
Payload p = result.getPayload();
|
Payload p = result.getPayload();
|
||||||
if (p != null)
|
if (p != null) {
|
||||||
result.setPayload(new Payload(p.toByteArray(), 0, p.length()));
|
result.setPayload((Payload) p.clone());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -74,7 +75,7 @@ public abstract class TokenStream {
|
|||||||
* implement this method. Reset() is not needed for
|
* implement this method. Reset() is not needed for
|
||||||
* the standard indexing process. However, if the Tokens
|
* the standard indexing process. However, if the Tokens
|
||||||
* of a TokenStream are intended to be consumed more than
|
* of a TokenStream are intended to be consumed more than
|
||||||
* once, it is neccessary to implement reset().
|
* once, it is necessary to implement reset().
|
||||||
*/
|
*/
|
||||||
public void reset() throws IOException {}
|
public void reset() throws IOException {}
|
||||||
|
|
||||||
|
@ -32,15 +32,9 @@ import org.apache.lucene.analysis.TokenStream;
|
|||||||
* <p>
|
* <p>
|
||||||
* Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
|
* Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
|
||||||
* to retrieve the payloads from the index.<br>
|
* to retrieve the payloads from the index.<br>
|
||||||
* <br>
|
|
||||||
*
|
*
|
||||||
* <p><font color="#FF0000">
|
|
||||||
* WARNING: The status of the <b>Payloads</b> feature is experimental.
|
|
||||||
* The APIs introduced here might change in the future and will not be
|
|
||||||
* supported anymore in such a case.</font>
|
|
||||||
*/
|
*/
|
||||||
// TODO: Remove warning after API has been finalized
|
public class Payload implements Serializable, Cloneable {
|
||||||
public class Payload implements Serializable {
|
|
||||||
/** the byte array containing the payload data */
|
/** the byte array containing the payload data */
|
||||||
protected byte[] data;
|
protected byte[] data;
|
||||||
|
|
||||||
@ -51,12 +45,14 @@ import org.apache.lucene.analysis.TokenStream;
|
|||||||
protected int length;
|
protected int length;
|
||||||
|
|
||||||
/** Creates an empty payload and does not allocate a byte array. */
|
/** Creates an empty payload and does not allocate a byte array. */
|
||||||
protected Payload() {
|
public Payload() {
|
||||||
// no-arg constructor since this class implements Serializable
|
// nothing to do
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new payload with the the given array as data.
|
* Creates a new payload with the the given array as data.
|
||||||
|
* A reference to the passed-in array is held, i. e. no
|
||||||
|
* copy is made.
|
||||||
*
|
*
|
||||||
* @param data the data of this payload
|
* @param data the data of this payload
|
||||||
*/
|
*/
|
||||||
@ -66,6 +62,8 @@ import org.apache.lucene.analysis.TokenStream;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new payload with the the given array as data.
|
* Creates a new payload with the the given array as data.
|
||||||
|
* A reference to the passed-in array is held, i. e. no
|
||||||
|
* copy is made.
|
||||||
*
|
*
|
||||||
* @param data the data of this payload
|
* @param data the data of this payload
|
||||||
* @param offset the offset in the data byte array
|
* @param offset the offset in the data byte array
|
||||||
@ -80,6 +78,41 @@ import org.apache.lucene.analysis.TokenStream;
|
|||||||
this.length = length;
|
this.length = length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets this payloads data.
|
||||||
|
* A reference to the passed-in array is held, i. e. no
|
||||||
|
* copy is made.
|
||||||
|
*/
|
||||||
|
public void setData(byte[] data) {
|
||||||
|
setData(data, 0, data.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets this payloads data.
|
||||||
|
* A reference to the passed-in array is held, i. e. no
|
||||||
|
* copy is made.
|
||||||
|
*/
|
||||||
|
public void setData(byte[] data, int offset, int length) {
|
||||||
|
this.data = data;
|
||||||
|
this.offset = offset;
|
||||||
|
this.length = length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a reference to the underlying byte array
|
||||||
|
* that holds this payloads data.
|
||||||
|
*/
|
||||||
|
public byte[] getData() {
|
||||||
|
return this.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the offset in the underlying byte array
|
||||||
|
*/
|
||||||
|
public int getOffset() {
|
||||||
|
return this.offset;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the length of the payload data.
|
* Returns the length of the payload data.
|
||||||
*/
|
*/
|
||||||
@ -118,4 +151,13 @@ import org.apache.lucene.analysis.TokenStream;
|
|||||||
}
|
}
|
||||||
System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
|
System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clones this payload by creating a copy of the underlying
|
||||||
|
* byte array.
|
||||||
|
*/
|
||||||
|
public Object clone() {
|
||||||
|
Payload clone = new Payload(this.toByteArray());
|
||||||
|
return clone;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,14 +43,8 @@ public interface TermPositions
|
|||||||
* Returns the length of the payload at the current term position.
|
* Returns the length of the payload at the current term position.
|
||||||
* This is invalid until {@link #nextPosition()} is called for
|
* This is invalid until {@link #nextPosition()} is called for
|
||||||
* the first time.<br>
|
* the first time.<br>
|
||||||
* <br>
|
|
||||||
* <p><font color="#FF0000">
|
|
||||||
* WARNING: The status of the <b>Payloads</b> feature is experimental.
|
|
||||||
* The APIs introduced here might change in the future and will not be
|
|
||||||
* supported anymore in such a case.</font>
|
|
||||||
* @return length of the current payload in number of bytes
|
* @return length of the current payload in number of bytes
|
||||||
*/
|
*/
|
||||||
// TODO: Remove warning after API has been finalized
|
|
||||||
int getPayloadLength();
|
int getPayloadLength();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -61,11 +55,6 @@ public interface TermPositions
|
|||||||
* of {@link #nextPosition()}. However, payloads are loaded lazily,
|
* of {@link #nextPosition()}. However, payloads are loaded lazily,
|
||||||
* so if the payload data for the current position is not needed,
|
* so if the payload data for the current position is not needed,
|
||||||
* this method may not be called at all for performance reasons.<br>
|
* this method may not be called at all for performance reasons.<br>
|
||||||
* <br>
|
|
||||||
* <p><font color="#FF0000">
|
|
||||||
* WARNING: The status of the <b>Payloads</b> feature is experimental.
|
|
||||||
* The APIs introduced here might change in the future and will not be
|
|
||||||
* supported anymore in such a case.</font>
|
|
||||||
*
|
*
|
||||||
* @param data the array into which the data of this payload is to be
|
* @param data the array into which the data of this payload is to be
|
||||||
* stored, if it is big enough; otherwise, a new byte[] array
|
* stored, if it is big enough; otherwise, a new byte[] array
|
||||||
@ -75,7 +64,6 @@ public interface TermPositions
|
|||||||
* @return a byte[] array containing the data of this payload
|
* @return a byte[] array containing the data of this payload
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
// TODO: Remove warning after API has been finalized
|
|
||||||
byte[] getPayload(byte[] data, int offset) throws IOException;
|
byte[] getPayload(byte[] data, int offset) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -84,14 +72,8 @@ public interface TermPositions
|
|||||||
* Payloads can only be loaded once per call to
|
* Payloads can only be loaded once per call to
|
||||||
* {@link #nextPosition()}.
|
* {@link #nextPosition()}.
|
||||||
*
|
*
|
||||||
* <p><font color="#FF0000">
|
|
||||||
* WARNING: The status of the <b>Payloads</b> feature is experimental.
|
|
||||||
* The APIs introduced here might change in the future and will not be
|
|
||||||
* supported anymore in such a case.</font>
|
|
||||||
*
|
|
||||||
* @return true if there is a payload available at this position that can be loaded
|
* @return true if there is a payload available at this position that can be loaded
|
||||||
*/
|
*/
|
||||||
// TODO: Remove warning after API has been finalized
|
|
||||||
public boolean isPayloadAvailable();
|
public boolean isPayloadAvailable();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -513,19 +513,14 @@ public abstract class Similarity implements Serializable {
|
|||||||
* what is in the byte array.
|
* what is in the byte array.
|
||||||
* <p>
|
* <p>
|
||||||
* The default implementation returns 1.
|
* The default implementation returns 1.
|
||||||
* <br>
|
|
||||||
* <p><font color="#FF0000">
|
|
||||||
* WARNING: The status of the <b>Payloads</b> feature is experimental.
|
|
||||||
* The APIs introduced here might change in the future and will not be
|
|
||||||
* supported anymore in such a case.</font>
|
|
||||||
*
|
*
|
||||||
|
* @param fieldName The fieldName of the term this payload belongs to
|
||||||
* @param payload The payload byte array to be scored
|
* @param payload The payload byte array to be scored
|
||||||
* @param offset The offset into the payload array
|
* @param offset The offset into the payload array
|
||||||
* @param length The length in the array
|
* @param length The length in the array
|
||||||
* @return An implementation dependent float to be used as a scoring factor
|
* @return An implementation dependent float to be used as a scoring factor
|
||||||
*/
|
*/
|
||||||
// TODO: Remove warning after API has been finalized
|
public float scorePayload(String fieldName, byte [] payload, int offset, int length)
|
||||||
public float scorePayload(byte [] payload, int offset, int length)
|
|
||||||
{
|
{
|
||||||
//Do nothing
|
//Do nothing
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -37,11 +37,6 @@ import java.io.IOException;
|
|||||||
* <p>
|
* <p>
|
||||||
* Payload scores are averaged across term occurrences in the document.
|
* Payload scores are averaged across term occurrences in the document.
|
||||||
*
|
*
|
||||||
* <p><font color="#FF0000">
|
|
||||||
* WARNING: The status of the <b>Payloads</b> feature is experimental.
|
|
||||||
* The APIs introduced here might change in the future and will not be
|
|
||||||
* supported anymore in such a case.</font>
|
|
||||||
*
|
|
||||||
* @see org.apache.lucene.search.Similarity#scorePayload(byte[], int, int)
|
* @see org.apache.lucene.search.Similarity#scorePayload(byte[], int, int)
|
||||||
*/
|
*/
|
||||||
public class BoostingTermQuery extends SpanTermQuery{
|
public class BoostingTermQuery extends SpanTermQuery{
|
||||||
@ -136,7 +131,7 @@ public class BoostingTermQuery extends SpanTermQuery{
|
|||||||
protected void processPayload(Similarity similarity) throws IOException {
|
protected void processPayload(Similarity similarity) throws IOException {
|
||||||
if (positions.isPayloadAvailable()) {
|
if (positions.isPayloadAvailable()) {
|
||||||
payload = positions.getPayload(payload, 0);
|
payload = positions.getPayload(payload, 0);
|
||||||
payloadScore += similarity.scorePayload(payload, 0, positions.getPayloadLength());
|
payloadScore += similarity.scorePayload(term.field(), payload, 0, positions.getPayloadLength());
|
||||||
payloadsSeen++;
|
payloadsSeen++;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -29,13 +29,6 @@
|
|||||||
<li><a href="./BoostingTermQuery.html">BoostingTermQuery</a> -- Boost a term's score based on the value of the payload located at that term</li>
|
<li><a href="./BoostingTermQuery.html">BoostingTermQuery</a> -- Boost a term's score based on the value of the payload located at that term</li>
|
||||||
</ol>
|
</ol>
|
||||||
</div>
|
</div>
|
||||||
<DIV>
|
|
||||||
<font color="#FF0000">
|
|
||||||
WARNING: The status of the <b>Payloads</b> feature is experimental. The APIs
|
|
||||||
introduced here might change in the future and will not be supported anymore
|
|
||||||
in such a case.
|
|
||||||
</font>
|
|
||||||
</DIV>
|
|
||||||
<DIV> </DIV>
|
<DIV> </DIV>
|
||||||
<DIV align="center">
|
<DIV align="center">
|
||||||
</DIV>
|
</DIV>
|
||||||
|
@ -81,6 +81,13 @@ public class TestPayloads extends LuceneTestCase {
|
|||||||
} catch (Exception expected) {
|
} catch (Exception expected) {
|
||||||
// expected exception
|
// expected exception
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Payload clone = (Payload) payload.clone();
|
||||||
|
assertEquals(payload.length(), clone.length());
|
||||||
|
for (int i = 0; i < payload.length(); i++) {
|
||||||
|
assertEquals(payload.byteAt(i), clone.byteAt(i));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests whether the DocumentWriter and SegmentMerger correctly enable the
|
// Tests whether the DocumentWriter and SegmentMerger correctly enable the
|
||||||
@ -429,6 +436,7 @@ public class TestPayloads extends LuceneTestCase {
|
|||||||
private byte[] data;
|
private byte[] data;
|
||||||
private int length;
|
private int length;
|
||||||
private int offset;
|
private int offset;
|
||||||
|
Payload payload = new Payload();
|
||||||
|
|
||||||
public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
|
public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
|
||||||
super(in);
|
super(in);
|
||||||
@ -437,14 +445,23 @@ public class TestPayloads extends LuceneTestCase {
|
|||||||
this.offset = offset;
|
this.offset = offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Token next() throws IOException {
|
public Token next(Token token) throws IOException {
|
||||||
Token nextToken = input.next();
|
token = input.next(token);
|
||||||
if (nextToken != null && offset + length <= data.length) {
|
if (token != null) {
|
||||||
nextToken.setPayload(new Payload(data, offset, length));
|
if (offset + length <= data.length) {
|
||||||
offset += length;
|
Payload p = null;
|
||||||
|
if (p == null) {
|
||||||
|
p = new Payload();
|
||||||
|
token.setPayload(p);
|
||||||
|
}
|
||||||
|
p.setData(data, offset, length);
|
||||||
|
offset += length;
|
||||||
|
} else {
|
||||||
|
token.setPayload(null);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nextToken;
|
return token;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,7 +196,7 @@ public class TestBoostingTermQuery extends LuceneTestCase {
|
|||||||
static class BoostingSimilarity extends DefaultSimilarity {
|
static class BoostingSimilarity extends DefaultSimilarity {
|
||||||
|
|
||||||
// TODO: Remove warning after API has been finalized
|
// TODO: Remove warning after API has been finalized
|
||||||
public float scorePayload(byte[] payload, int offset, int length) {
|
public float scorePayload(String fieldName, byte[] payload, int offset, int length) {
|
||||||
//we know it is size 4 here, so ignore the offset/length
|
//we know it is size 4 here, so ignore the offset/length
|
||||||
return payload[0];
|
return payload[0];
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user