diff --git a/CHANGES.txt b/CHANGES.txt index 8c6093b3be2..6669a081960 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -62,6 +62,11 @@ API Changes 7. LUCENE-743: Add IndexReader.reopen() method that re-opens an existing IndexReader (see New features -> 9.) (Michael Busch) + + 8. LUCENE-1062: Add setData(byte[] data), + setData(byte[] data, int offset, int length), getData(), getOffset() + and clone() methods to o.a.l.index.Payload. Also add the field name + as arg to Similarity.scorePayload(). (Michael Busch) Bug fixes diff --git a/src/java/org/apache/lucene/analysis/Token.java b/src/java/org/apache/lucene/analysis/Token.java index 3bf67dfb3e1..9b6376ee87d 100644 --- a/src/java/org/apache/lucene/analysis/Token.java +++ b/src/java/org/apache/lucene/analysis/Token.java @@ -73,9 +73,6 @@ import org.apache.lucene.index.TermPositions; @see org.apache.lucene.index.Payload */ - -// TODO: Remove warning after API has been finalized - public class Token implements Cloneable { private static final String DEFAULT_TYPE = "word"; @@ -321,25 +318,15 @@ public class Token implements Cloneable { } /** - * Returns this Token's payload. - *

- * WARNING: The status of the Payloads feature is experimental. - * The APIs introduced here might change in the future and will not be - * supported anymore in such a case. - */ - // TODO: Remove warning after API has been finalized + * Returns this Token's payload. + */ public Payload getPayload() { return this.payload; } /** * Sets this Token's payload. - *

- * WARNING: The status of the Payloads feature is experimental. - * The APIs introduced here might change in the future and will not be - * supported anymore in such a case. */ - // TODO: Remove warning after API has been finalized public void setPayload(Payload payload) { this.payload = payload; } @@ -381,6 +368,9 @@ public class Token implements Cloneable { t.termBuffer = null; t.setTermBuffer(termBuffer, 0, termLength); } + if (payload != null) { + t.setPayload((Payload) payload.clone()); + } return t; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); // shouldn't happen diff --git a/src/java/org/apache/lucene/analysis/TokenStream.java b/src/java/org/apache/lucene/analysis/TokenStream.java index def4e3e9b27..6f0dd90cb87 100644 --- a/src/java/org/apache/lucene/analysis/TokenStream.java +++ b/src/java/org/apache/lucene/analysis/TokenStream.java @@ -46,8 +46,9 @@ public abstract class TokenStream { if (result != null) { Payload p = result.getPayload(); - if (p != null) - result.setPayload(new Payload(p.toByteArray(), 0, p.length())); + if (p != null) { + result.setPayload((Payload) p.clone()); + } } return result; @@ -74,7 +75,7 @@ public abstract class TokenStream { * implement this method. Reset() is not needed for * the standard indexing process. However, if the Tokens * of a TokenStream are intended to be consumed more than - * once, it is neccessary to implement reset(). + * once, it is necessary to implement reset(). */ public void reset() throws IOException {} diff --git a/src/java/org/apache/lucene/index/Payload.java b/src/java/org/apache/lucene/index/Payload.java index 15f11364a92..f68ae1fd60e 100644 --- a/src/java/org/apache/lucene/index/Payload.java +++ b/src/java/org/apache/lucene/index/Payload.java @@ -32,15 +32,9 @@ import org.apache.lucene.analysis.TokenStream; *

* Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} * to retrieve the payloads from the index.
- *
- * - *

- * WARNING: The status of the Payloads feature is experimental. - * The APIs introduced here might change in the future and will not be - * supported anymore in such a case. - */ - // TODO: Remove warning after API has been finalized - public class Payload implements Serializable { + * + */ + public class Payload implements Serializable, Cloneable { /** the byte array containing the payload data */ protected byte[] data; @@ -51,12 +45,14 @@ import org.apache.lucene.analysis.TokenStream; protected int length; /** Creates an empty payload and does not allocate a byte array. */ - protected Payload() { - // no-arg constructor since this class implements Serializable + public Payload() { + // nothing to do } /** * Creates a new payload with the the given array as data. + * A reference to the passed-in array is held, i. e. no + * copy is made. * * @param data the data of this payload */ @@ -66,6 +62,8 @@ import org.apache.lucene.analysis.TokenStream; /** * Creates a new payload with the the given array as data. + * A reference to the passed-in array is held, i. e. no + * copy is made. * * @param data the data of this payload * @param offset the offset in the data byte array @@ -80,6 +78,41 @@ import org.apache.lucene.analysis.TokenStream; this.length = length; } + /** + * Sets this payloads data. + * A reference to the passed-in array is held, i. e. no + * copy is made. + */ + public void setData(byte[] data) { + setData(data, 0, data.length); + } + + /** + * Sets this payloads data. + * A reference to the passed-in array is held, i. e. no + * copy is made. + */ + public void setData(byte[] data, int offset, int length) { + this.data = data; + this.offset = offset; + this.length = length; + } + + /** + * Returns a reference to the underlying byte array + * that holds this payloads data. + */ + public byte[] getData() { + return this.data; + } + + /** + * Returns the offset in the underlying byte array + */ + public int getOffset() { + return this.offset; + } + /** * Returns the length of the payload data. */ @@ -118,4 +151,13 @@ import org.apache.lucene.analysis.TokenStream; } System.arraycopy(this.data, this.offset, target, targetOffset, this.length); } + + /** + * Clones this payload by creating a copy of the underlying + * byte array. + */ + public Object clone() { + Payload clone = new Payload(this.toByteArray()); + return clone; + } } diff --git a/src/java/org/apache/lucene/index/TermPositions.java b/src/java/org/apache/lucene/index/TermPositions.java index 5c5cec59a0e..a22f9f08021 100644 --- a/src/java/org/apache/lucene/index/TermPositions.java +++ b/src/java/org/apache/lucene/index/TermPositions.java @@ -43,14 +43,8 @@ public interface TermPositions * Returns the length of the payload at the current term position. * This is invalid until {@link #nextPosition()} is called for * the first time.
- *
- *

- * WARNING: The status of the Payloads feature is experimental. - * The APIs introduced here might change in the future and will not be - * supported anymore in such a case. * @return length of the current payload in number of bytes */ - // TODO: Remove warning after API has been finalized int getPayloadLength(); /** @@ -61,11 +55,6 @@ public interface TermPositions * of {@link #nextPosition()}. However, payloads are loaded lazily, * so if the payload data for the current position is not needed, * this method may not be called at all for performance reasons.
- *
- *

- * WARNING: The status of the Payloads feature is experimental. - * The APIs introduced here might change in the future and will not be - * supported anymore in such a case. * * @param data the array into which the data of this payload is to be * stored, if it is big enough; otherwise, a new byte[] array @@ -75,7 +64,6 @@ public interface TermPositions * @return a byte[] array containing the data of this payload * @throws IOException */ - // TODO: Remove warning after API has been finalized byte[] getPayload(byte[] data, int offset) throws IOException; /** @@ -84,14 +72,8 @@ public interface TermPositions * Payloads can only be loaded once per call to * {@link #nextPosition()}. * - *

- * WARNING: The status of the Payloads feature is experimental. - * The APIs introduced here might change in the future and will not be - * supported anymore in such a case. - * * @return true if there is a payload available at this position that can be loaded */ - // TODO: Remove warning after API has been finalized public boolean isPayloadAvailable(); } diff --git a/src/java/org/apache/lucene/search/Similarity.java b/src/java/org/apache/lucene/search/Similarity.java index 30df6385d0d..d4d058804a4 100644 --- a/src/java/org/apache/lucene/search/Similarity.java +++ b/src/java/org/apache/lucene/search/Similarity.java @@ -513,19 +513,14 @@ public abstract class Similarity implements Serializable { * what is in the byte array. *

* The default implementation returns 1. - *
- *

- * WARNING: The status of the Payloads feature is experimental. - * The APIs introduced here might change in the future and will not be - * supported anymore in such a case. * + * @param fieldName The fieldName of the term this payload belongs to * @param payload The payload byte array to be scored * @param offset The offset into the payload array * @param length The length in the array * @return An implementation dependent float to be used as a scoring factor */ - // TODO: Remove warning after API has been finalized - public float scorePayload(byte [] payload, int offset, int length) + public float scorePayload(String fieldName, byte [] payload, int offset, int length) { //Do nothing return 1; diff --git a/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java b/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java index f0a5cad2694..44951968a35 100644 --- a/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java +++ b/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java @@ -37,11 +37,6 @@ import java.io.IOException; *

* Payload scores are averaged across term occurrences in the document. * - *

- * WARNING: The status of the Payloads feature is experimental. - * The APIs introduced here might change in the future and will not be - * supported anymore in such a case. - * * @see org.apache.lucene.search.Similarity#scorePayload(byte[], int, int) */ public class BoostingTermQuery extends SpanTermQuery{ @@ -136,7 +131,7 @@ public class BoostingTermQuery extends SpanTermQuery{ protected void processPayload(Similarity similarity) throws IOException { if (positions.isPayloadAvailable()) { payload = positions.getPayload(payload, 0); - payloadScore += similarity.scorePayload(payload, 0, positions.getPayloadLength()); + payloadScore += similarity.scorePayload(term.field(), payload, 0, positions.getPayloadLength()); payloadsSeen++; } else { diff --git a/src/java/org/apache/lucene/search/payloads/package.html b/src/java/org/apache/lucene/search/payloads/package.html index 37100f1e786..995d9161235 100644 --- a/src/java/org/apache/lucene/search/payloads/package.html +++ b/src/java/org/apache/lucene/search/payloads/package.html @@ -29,13 +29,6 @@

  • BoostingTermQuery -- Boost a term's score based on the value of the payload located at that term
  • -
    - -WARNING: The status of the Payloads feature is experimental. The APIs -introduced here might change in the future and will not be supported anymore -in such a case. - -
     
    diff --git a/src/test/org/apache/lucene/index/TestPayloads.java b/src/test/org/apache/lucene/index/TestPayloads.java index d8cf469688b..aa3bace21fd 100644 --- a/src/test/org/apache/lucene/index/TestPayloads.java +++ b/src/test/org/apache/lucene/index/TestPayloads.java @@ -81,6 +81,13 @@ public class TestPayloads extends LuceneTestCase { } catch (Exception expected) { // expected exception } + + Payload clone = (Payload) payload.clone(); + assertEquals(payload.length(), clone.length()); + for (int i = 0; i < payload.length(); i++) { + assertEquals(payload.byteAt(i), clone.byteAt(i)); + } + } // Tests whether the DocumentWriter and SegmentMerger correctly enable the @@ -429,6 +436,7 @@ public class TestPayloads extends LuceneTestCase { private byte[] data; private int length; private int offset; + Payload payload = new Payload(); public PayloadFilter(TokenStream in, byte[] data, int offset, int length) { super(in); @@ -437,14 +445,23 @@ public class TestPayloads extends LuceneTestCase { this.offset = offset; } - public Token next() throws IOException { - Token nextToken = input.next(); - if (nextToken != null && offset + length <= data.length) { - nextToken.setPayload(new Payload(data, offset, length)); - offset += length; - } + public Token next(Token token) throws IOException { + token = input.next(token); + if (token != null) { + if (offset + length <= data.length) { + Payload p = null; + if (p == null) { + p = new Payload(); + token.setPayload(p); + } + p.setData(data, offset, length); + offset += length; + } else { + token.setPayload(null); + } + } - return nextToken; + return token; } } diff --git a/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java b/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java index 403c3e364e8..47dafecbe6e 100644 --- a/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java +++ b/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java @@ -196,7 +196,7 @@ public class TestBoostingTermQuery extends LuceneTestCase { static class BoostingSimilarity extends DefaultSimilarity { // TODO: Remove warning after API has been finalized - public float scorePayload(byte[] payload, int offset, int length) { + public float scorePayload(String fieldName, byte[] payload, int offset, int length) { //we know it is size 4 here, so ignore the offset/length return payload[0]; }