LUCENE-3396: Converted simple Analyzers which got lost in merging

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1169654 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher John Male 2011-09-12 09:00:42 +00:00
parent f7adf92edf
commit 3597bc4bf4
3 changed files with 49 additions and 47 deletions

View File

@ -120,6 +120,7 @@ public class TestPayloads extends LuceneTestCase {
// now we add another document which has payloads for field f3 and verify if the SegmentMerger
// enabled payloads for that field
analyzer = new PayloadAnalyzer(); // Clear payload state for each field
writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT,
analyzer).setOpenMode(OpenMode.CREATE));
d = new Document();
@ -188,9 +189,9 @@ public class TestPayloads extends LuceneTestCase {
// occurrences within two consecutive skip intervals
int offset = 0;
for (int i = 0; i < 2 * numDocs; i++) {
analyzer.setPayloadData(fieldName, payloadData, offset, 1);
analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1);
offset += numTerms;
writer.addDocument(d);
writer.addDocument(d, analyzer);
}
// make sure we create more than one segment to test merging
@ -198,9 +199,9 @@ public class TestPayloads extends LuceneTestCase {
// now we make sure to have different payload lengths next at the next skip point
for (int i = 0; i < numDocs; i++) {
analyzer.setPayloadData(fieldName, payloadData, offset, i);
analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i);
offset += i * numTerms;
writer.addDocument(d);
writer.addDocument(d, analyzer);
}
writer.optimize();
@ -404,39 +405,37 @@ public class TestPayloads extends LuceneTestCase {
/**
* This Analyzer uses an WhitespaceTokenizer and PayloadFilter.
*/
private static class PayloadAnalyzer extends Analyzer {
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
Map<String,PayloadData> fieldToData = new HashMap<String,PayloadData>();
public PayloadAnalyzer() {
super(new PerFieldReuseStrategy());
}
void setPayloadData(String field, byte[] data, int offset, int length) {
fieldToData.put(field, new PayloadData(0, data, offset, length));
public PayloadAnalyzer(String field, byte[] data, int offset, int length) {
super(new PerFieldReuseStrategy());
setPayloadData(field, data, offset, length);
}
void setPayloadData(String field, int numFieldInstancesToSkip, byte[] data, int offset, int length) {
fieldToData.put(field, new PayloadData(numFieldInstancesToSkip, data, offset, length));
void setPayloadData(String field, byte[] data, int offset, int length) {
fieldToData.put(field, new PayloadData(data, offset, length));
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
PayloadData payload = fieldToData.get(fieldName);
TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
if (payload != null) {
if (payload.numFieldInstancesToSkip == 0) {
ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length);
} else {
payload.numFieldInstancesToSkip--;
}
}
return ts;
Tokenizer ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
TokenStream tokenStream = (payload != null) ?
new PayloadFilter(ts, payload.data, payload.offset, payload.length) : ts;
return new TokenStreamComponents(ts, tokenStream);
}
private static class PayloadData {
byte[] data;
int offset;
int length;
int numFieldInstancesToSkip;
PayloadData(int skip, byte[] data, int offset, int length) {
numFieldInstancesToSkip = skip;
PayloadData(byte[] data, int offset, int length) {
this.data = data;
this.offset = offset;
this.length = length;
@ -454,6 +453,7 @@ public class TestPayloads extends LuceneTestCase {
private int offset;
private int startOffset;
PayloadAttribute payloadAtt;
CharTermAttribute termAttribute;
public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
super(in);
@ -462,23 +462,27 @@ public class TestPayloads extends LuceneTestCase {
this.offset = offset;
this.startOffset = offset;
payloadAtt = addAttribute(PayloadAttribute.class);
termAttribute = addAttribute(CharTermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
boolean hasNext = input.incrementToken();
if (hasNext) {
if (offset + length <= data.length) {
Payload p = new Payload();
payloadAtt.setPayload(p);
p.setData(data, offset, length);
offset += length;
} else {
payloadAtt.setPayload(null);
}
if (!hasNext) {
return false;
}
return hasNext;
// Some values of the same field are to have payloads and others not
if (offset + length <= data.length && !termAttribute.toString().endsWith("NO PAYLOAD")) {
Payload p = new Payload();
payloadAtt.setPayload(p);
p.setData(data, offset, length);
offset += length;
} else {
payloadAtt.setPayload(null);
}
return true;
}
@Override

View File

@ -24,11 +24,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
@ -100,11 +96,12 @@ public class TestBasics extends LuceneTestCase {
}
}
static final Analyzer simplePayloadAnalyzer = new Analyzer() {
static final Analyzer simplePayloadAnalyzer = new ReusableAnalyzerBase() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
}
};

View File

@ -213,12 +213,13 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("NUTCH")), false);
/* analyzer that uses whitespace + wdf */
Analyzer a = new Analyzer() {
Analyzer a = new ReusableAnalyzerBase() {
@Override
public TokenStream tokenStream(String field, Reader reader) {
return new WordDelimiterFilter(
new MockTokenizer(reader, MockTokenizer.WHITESPACE, false),
flags, protWords);
public TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(
tokenizer,
flags, protWords));
}
};