mirror of https://github.com/apache/lucene.git
LUCENE-3396: Converted simple Analyzers which got lost in merging
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1169654 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f7adf92edf
commit
3597bc4bf4
|
@ -120,6 +120,7 @@ public class TestPayloads extends LuceneTestCase {
|
||||||
|
|
||||||
// now we add another document which has payloads for field f3 and verify if the SegmentMerger
|
// now we add another document which has payloads for field f3 and verify if the SegmentMerger
|
||||||
// enabled payloads for that field
|
// enabled payloads for that field
|
||||||
|
analyzer = new PayloadAnalyzer(); // Clear payload state for each field
|
||||||
writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT,
|
writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT,
|
||||||
analyzer).setOpenMode(OpenMode.CREATE));
|
analyzer).setOpenMode(OpenMode.CREATE));
|
||||||
d = new Document();
|
d = new Document();
|
||||||
|
@ -188,9 +189,9 @@ public class TestPayloads extends LuceneTestCase {
|
||||||
// occurrences within two consecutive skip intervals
|
// occurrences within two consecutive skip intervals
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for (int i = 0; i < 2 * numDocs; i++) {
|
for (int i = 0; i < 2 * numDocs; i++) {
|
||||||
analyzer.setPayloadData(fieldName, payloadData, offset, 1);
|
analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1);
|
||||||
offset += numTerms;
|
offset += numTerms;
|
||||||
writer.addDocument(d);
|
writer.addDocument(d, analyzer);
|
||||||
}
|
}
|
||||||
|
|
||||||
// make sure we create more than one segment to test merging
|
// make sure we create more than one segment to test merging
|
||||||
|
@ -198,9 +199,9 @@ public class TestPayloads extends LuceneTestCase {
|
||||||
|
|
||||||
// now we make sure to have different payload lengths next at the next skip point
|
// now we make sure to have different payload lengths next at the next skip point
|
||||||
for (int i = 0; i < numDocs; i++) {
|
for (int i = 0; i < numDocs; i++) {
|
||||||
analyzer.setPayloadData(fieldName, payloadData, offset, i);
|
analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i);
|
||||||
offset += i * numTerms;
|
offset += i * numTerms;
|
||||||
writer.addDocument(d);
|
writer.addDocument(d, analyzer);
|
||||||
}
|
}
|
||||||
|
|
||||||
writer.optimize();
|
writer.optimize();
|
||||||
|
@ -404,39 +405,37 @@ public class TestPayloads extends LuceneTestCase {
|
||||||
/**
|
/**
|
||||||
* This Analyzer uses an WhitespaceTokenizer and PayloadFilter.
|
* This Analyzer uses an WhitespaceTokenizer and PayloadFilter.
|
||||||
*/
|
*/
|
||||||
private static class PayloadAnalyzer extends Analyzer {
|
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||||
Map<String,PayloadData> fieldToData = new HashMap<String,PayloadData>();
|
Map<String,PayloadData> fieldToData = new HashMap<String,PayloadData>();
|
||||||
|
|
||||||
void setPayloadData(String field, byte[] data, int offset, int length) {
|
public PayloadAnalyzer() {
|
||||||
fieldToData.put(field, new PayloadData(0, data, offset, length));
|
super(new PerFieldReuseStrategy());
|
||||||
}
|
}
|
||||||
|
|
||||||
void setPayloadData(String field, int numFieldInstancesToSkip, byte[] data, int offset, int length) {
|
public PayloadAnalyzer(String field, byte[] data, int offset, int length) {
|
||||||
fieldToData.put(field, new PayloadData(numFieldInstancesToSkip, data, offset, length));
|
super(new PerFieldReuseStrategy());
|
||||||
|
setPayloadData(field, data, offset, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
void setPayloadData(String field, byte[] data, int offset, int length) {
|
||||||
|
fieldToData.put(field, new PayloadData(data, offset, length));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
PayloadData payload = fieldToData.get(fieldName);
|
PayloadData payload = fieldToData.get(fieldName);
|
||||||
TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
Tokenizer ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
if (payload != null) {
|
TokenStream tokenStream = (payload != null) ?
|
||||||
if (payload.numFieldInstancesToSkip == 0) {
|
new PayloadFilter(ts, payload.data, payload.offset, payload.length) : ts;
|
||||||
ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length);
|
return new TokenStreamComponents(ts, tokenStream);
|
||||||
} else {
|
|
||||||
payload.numFieldInstancesToSkip--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ts;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class PayloadData {
|
private static class PayloadData {
|
||||||
byte[] data;
|
byte[] data;
|
||||||
int offset;
|
int offset;
|
||||||
int length;
|
int length;
|
||||||
int numFieldInstancesToSkip;
|
|
||||||
|
|
||||||
PayloadData(int skip, byte[] data, int offset, int length) {
|
PayloadData(byte[] data, int offset, int length) {
|
||||||
numFieldInstancesToSkip = skip;
|
|
||||||
this.data = data;
|
this.data = data;
|
||||||
this.offset = offset;
|
this.offset = offset;
|
||||||
this.length = length;
|
this.length = length;
|
||||||
|
@ -454,6 +453,7 @@ public class TestPayloads extends LuceneTestCase {
|
||||||
private int offset;
|
private int offset;
|
||||||
private int startOffset;
|
private int startOffset;
|
||||||
PayloadAttribute payloadAtt;
|
PayloadAttribute payloadAtt;
|
||||||
|
CharTermAttribute termAttribute;
|
||||||
|
|
||||||
public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
|
public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
|
||||||
super(in);
|
super(in);
|
||||||
|
@ -462,13 +462,18 @@ public class TestPayloads extends LuceneTestCase {
|
||||||
this.offset = offset;
|
this.offset = offset;
|
||||||
this.startOffset = offset;
|
this.startOffset = offset;
|
||||||
payloadAtt = addAttribute(PayloadAttribute.class);
|
payloadAtt = addAttribute(PayloadAttribute.class);
|
||||||
|
termAttribute = addAttribute(CharTermAttribute.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
boolean hasNext = input.incrementToken();
|
boolean hasNext = input.incrementToken();
|
||||||
if (hasNext) {
|
if (!hasNext) {
|
||||||
if (offset + length <= data.length) {
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some values of the same field are to have payloads and others not
|
||||||
|
if (offset + length <= data.length && !termAttribute.toString().endsWith("NO PAYLOAD")) {
|
||||||
Payload p = new Payload();
|
Payload p = new Payload();
|
||||||
payloadAtt.setPayload(p);
|
payloadAtt.setPayload(p);
|
||||||
p.setData(data, offset, length);
|
p.setData(data, offset, length);
|
||||||
|
@ -476,9 +481,8 @@ public class TestPayloads extends LuceneTestCase {
|
||||||
} else {
|
} else {
|
||||||
payloadAtt.setPayload(null);
|
payloadAtt.setPayload(null);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return hasNext;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -24,11 +24,7 @@ import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -100,11 +96,12 @@ public class TestBasics extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static final Analyzer simplePayloadAnalyzer = new Analyzer() {
|
static final Analyzer simplePayloadAnalyzer = new ReusableAnalyzerBase() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
|
return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -213,12 +213,13 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
||||||
final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("NUTCH")), false);
|
final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("NUTCH")), false);
|
||||||
|
|
||||||
/* analyzer that uses whitespace + wdf */
|
/* analyzer that uses whitespace + wdf */
|
||||||
Analyzer a = new Analyzer() {
|
Analyzer a = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String field, Reader reader) {
|
public TokenStreamComponents createComponents(String field, Reader reader) {
|
||||||
return new WordDelimiterFilter(
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
new MockTokenizer(reader, MockTokenizer.WHITESPACE, false),
|
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(
|
||||||
flags, protWords);
|
tokenizer,
|
||||||
|
flags, protWords));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue