From 3597bc4bf4b22eef4585de211bea7593d8b69035 Mon Sep 17 00:00:00 2001 From: Christopher John Male Date: Mon, 12 Sep 2011 09:00:42 +0000 Subject: [PATCH] LUCENE-3396: Converted simple Analyzers which got lost in merging git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1169654 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/TestPayloads.java | 72 ++++++++++--------- .../lucene/search/spans/TestBasics.java | 13 ++-- .../TestWordDelimiterFilter.java | 11 +-- 3 files changed, 49 insertions(+), 47 deletions(-) diff --git a/lucene/src/test/org/apache/lucene/index/TestPayloads.java b/lucene/src/test/org/apache/lucene/index/TestPayloads.java index c1ba6cb8dea..d68a8f8bbba 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPayloads.java +++ b/lucene/src/test/org/apache/lucene/index/TestPayloads.java @@ -120,6 +120,7 @@ public class TestPayloads extends LuceneTestCase { // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field + analyzer = new PayloadAnalyzer(); // Clear payload state for each field writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE)); d = new Document(); @@ -188,9 +189,9 @@ public class TestPayloads extends LuceneTestCase { // occurrences within two consecutive skip intervals int offset = 0; for (int i = 0; i < 2 * numDocs; i++) { - analyzer.setPayloadData(fieldName, payloadData, offset, 1); + analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1); offset += numTerms; - writer.addDocument(d); + writer.addDocument(d, analyzer); } // make sure we create more than one segment to test merging @@ -198,9 +199,9 @@ public class TestPayloads extends LuceneTestCase { // now we make sure to have different payload lengths next at the next skip point for (int i = 0; i < numDocs; i++) { - analyzer.setPayloadData(fieldName, payloadData, offset, i); + analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i); offset += i * numTerms; - writer.addDocument(d); + writer.addDocument(d, analyzer); } writer.optimize(); @@ -404,39 +405,37 @@ public class TestPayloads extends LuceneTestCase { /** * This Analyzer uses an WhitespaceTokenizer and PayloadFilter. */ - private static class PayloadAnalyzer extends Analyzer { + private static class PayloadAnalyzer extends ReusableAnalyzerBase { Map fieldToData = new HashMap(); + + public PayloadAnalyzer() { + super(new PerFieldReuseStrategy()); + } - void setPayloadData(String field, byte[] data, int offset, int length) { - fieldToData.put(field, new PayloadData(0, data, offset, length)); + public PayloadAnalyzer(String field, byte[] data, int offset, int length) { + super(new PerFieldReuseStrategy()); + setPayloadData(field, data, offset, length); } - void setPayloadData(String field, int numFieldInstancesToSkip, byte[] data, int offset, int length) { - fieldToData.put(field, new PayloadData(numFieldInstancesToSkip, data, offset, length)); + void setPayloadData(String field, byte[] data, int offset, int length) { + fieldToData.put(field, new PayloadData(data, offset, length)); } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { PayloadData payload = fieldToData.get(fieldName); - TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - if (payload != null) { - if (payload.numFieldInstancesToSkip == 0) { - ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length); - } else { - payload.numFieldInstancesToSkip--; - } - } - return ts; + Tokenizer ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + TokenStream tokenStream = (payload != null) ? + new PayloadFilter(ts, payload.data, payload.offset, payload.length) : ts; + return new TokenStreamComponents(ts, tokenStream); } private static class PayloadData { byte[] data; int offset; int length; - int numFieldInstancesToSkip; - - PayloadData(int skip, byte[] data, int offset, int length) { - numFieldInstancesToSkip = skip; + + PayloadData(byte[] data, int offset, int length) { this.data = data; this.offset = offset; this.length = length; @@ -454,6 +453,7 @@ public class TestPayloads extends LuceneTestCase { private int offset; private int startOffset; PayloadAttribute payloadAtt; + CharTermAttribute termAttribute; public PayloadFilter(TokenStream in, byte[] data, int offset, int length) { super(in); @@ -462,23 +462,27 @@ public class TestPayloads extends LuceneTestCase { this.offset = offset; this.startOffset = offset; payloadAtt = addAttribute(PayloadAttribute.class); + termAttribute = addAttribute(CharTermAttribute.class); } @Override public boolean incrementToken() throws IOException { boolean hasNext = input.incrementToken(); - if (hasNext) { - if (offset + length <= data.length) { - Payload p = new Payload(); - payloadAtt.setPayload(p); - p.setData(data, offset, length); - offset += length; - } else { - payloadAtt.setPayload(null); - } + if (!hasNext) { + return false; } - - return hasNext; + + // Some values of the same field are to have payloads and others not + if (offset + length <= data.length && !termAttribute.toString().endsWith("NO PAYLOAD")) { + Payload p = new Payload(); + payloadAtt.setPayload(p); + p.setData(data, offset, length); + offset += length; + } else { + payloadAtt.setPayload(null); + } + + return true; } @Override diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java index c3a30c51477..15448e5ba49 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java @@ -24,11 +24,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; @@ -100,11 +96,12 @@ public class TestBasics extends LuceneTestCase { } } - static final Analyzer simplePayloadAnalyzer = new Analyzer() { + static final Analyzer simplePayloadAnalyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer)); } }; diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java index c12a4142cc7..52191ac68ea 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java @@ -213,12 +213,13 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet(Arrays.asList("NUTCH")), false); /* analyzer that uses whitespace + wdf */ - Analyzer a = new Analyzer() { + Analyzer a = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String field, Reader reader) { - return new WordDelimiterFilter( - new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), - flags, protWords); + public TokenStreamComponents createComponents(String field, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter( + tokenizer, + flags, protWords)); } };