LUCENE-3396: Converted simple Analyzers which got lost in merging

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1169654 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher John Male 2011-09-12 09:00:42 +00:00
parent f7adf92edf
commit 3597bc4bf4
3 changed files with 49 additions and 47 deletions

View File

@ -120,6 +120,7 @@ public class TestPayloads extends LuceneTestCase {
// now we add another document which has payloads for field f3 and verify if the SegmentMerger // now we add another document which has payloads for field f3 and verify if the SegmentMerger
// enabled payloads for that field // enabled payloads for that field
analyzer = new PayloadAnalyzer(); // Clear payload state for each field
writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT,
analyzer).setOpenMode(OpenMode.CREATE)); analyzer).setOpenMode(OpenMode.CREATE));
d = new Document(); d = new Document();
@ -188,9 +189,9 @@ public class TestPayloads extends LuceneTestCase {
// occurrences within two consecutive skip intervals // occurrences within two consecutive skip intervals
int offset = 0; int offset = 0;
for (int i = 0; i < 2 * numDocs; i++) { for (int i = 0; i < 2 * numDocs; i++) {
analyzer.setPayloadData(fieldName, payloadData, offset, 1); analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1);
offset += numTerms; offset += numTerms;
writer.addDocument(d); writer.addDocument(d, analyzer);
} }
// make sure we create more than one segment to test merging // make sure we create more than one segment to test merging
@ -198,9 +199,9 @@ public class TestPayloads extends LuceneTestCase {
// now we make sure to have different payload lengths next at the next skip point // now we make sure to have different payload lengths next at the next skip point
for (int i = 0; i < numDocs; i++) { for (int i = 0; i < numDocs; i++) {
analyzer.setPayloadData(fieldName, payloadData, offset, i); analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i);
offset += i * numTerms; offset += i * numTerms;
writer.addDocument(d); writer.addDocument(d, analyzer);
} }
writer.optimize(); writer.optimize();
@ -404,39 +405,37 @@ public class TestPayloads extends LuceneTestCase {
/** /**
* This Analyzer uses an WhitespaceTokenizer and PayloadFilter. * This Analyzer uses an WhitespaceTokenizer and PayloadFilter.
*/ */
private static class PayloadAnalyzer extends Analyzer { private static class PayloadAnalyzer extends ReusableAnalyzerBase {
Map<String,PayloadData> fieldToData = new HashMap<String,PayloadData>(); Map<String,PayloadData> fieldToData = new HashMap<String,PayloadData>();
void setPayloadData(String field, byte[] data, int offset, int length) { public PayloadAnalyzer() {
fieldToData.put(field, new PayloadData(0, data, offset, length)); super(new PerFieldReuseStrategy());
} }
void setPayloadData(String field, int numFieldInstancesToSkip, byte[] data, int offset, int length) { public PayloadAnalyzer(String field, byte[] data, int offset, int length) {
fieldToData.put(field, new PayloadData(numFieldInstancesToSkip, data, offset, length)); super(new PerFieldReuseStrategy());
setPayloadData(field, data, offset, length);
}
void setPayloadData(String field, byte[] data, int offset, int length) {
fieldToData.put(field, new PayloadData(data, offset, length));
} }
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
PayloadData payload = fieldToData.get(fieldName); PayloadData payload = fieldToData.get(fieldName);
TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); Tokenizer ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
if (payload != null) { TokenStream tokenStream = (payload != null) ?
if (payload.numFieldInstancesToSkip == 0) { new PayloadFilter(ts, payload.data, payload.offset, payload.length) : ts;
ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length); return new TokenStreamComponents(ts, tokenStream);
} else {
payload.numFieldInstancesToSkip--;
}
}
return ts;
} }
private static class PayloadData { private static class PayloadData {
byte[] data; byte[] data;
int offset; int offset;
int length; int length;
int numFieldInstancesToSkip;
PayloadData(int skip, byte[] data, int offset, int length) { PayloadData(byte[] data, int offset, int length) {
numFieldInstancesToSkip = skip;
this.data = data; this.data = data;
this.offset = offset; this.offset = offset;
this.length = length; this.length = length;
@ -454,6 +453,7 @@ public class TestPayloads extends LuceneTestCase {
private int offset; private int offset;
private int startOffset; private int startOffset;
PayloadAttribute payloadAtt; PayloadAttribute payloadAtt;
CharTermAttribute termAttribute;
public PayloadFilter(TokenStream in, byte[] data, int offset, int length) { public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
super(in); super(in);
@ -462,23 +462,27 @@ public class TestPayloads extends LuceneTestCase {
this.offset = offset; this.offset = offset;
this.startOffset = offset; this.startOffset = offset;
payloadAtt = addAttribute(PayloadAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class);
termAttribute = addAttribute(CharTermAttribute.class);
} }
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
boolean hasNext = input.incrementToken(); boolean hasNext = input.incrementToken();
if (hasNext) { if (!hasNext) {
if (offset + length <= data.length) { return false;
Payload p = new Payload();
payloadAtt.setPayload(p);
p.setData(data, offset, length);
offset += length;
} else {
payloadAtt.setPayload(null);
}
} }
return hasNext; // Some values of the same field are to have payloads and others not
if (offset + length <= data.length && !termAttribute.toString().endsWith("NO PAYLOAD")) {
Payload p = new Payload();
payloadAtt.setPayload(p);
p.setData(data, offset, length);
offset += length;
} else {
payloadAtt.setPayload(null);
}
return true;
} }
@Override @Override

View File

@ -24,11 +24,7 @@ import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -100,11 +96,12 @@ public class TestBasics extends LuceneTestCase {
} }
} }
static final Analyzer simplePayloadAnalyzer = new Analyzer() { static final Analyzer simplePayloadAnalyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
} }
}; };

View File

@ -213,12 +213,13 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("NUTCH")), false); final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("NUTCH")), false);
/* analyzer that uses whitespace + wdf */ /* analyzer that uses whitespace + wdf */
Analyzer a = new Analyzer() { Analyzer a = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String field, Reader reader) { public TokenStreamComponents createComponents(String field, Reader reader) {
return new WordDelimiterFilter( Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(
flags, protWords); tokenizer,
flags, protWords));
} }
}; };