add some javadocs to the test-framework analysis components

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1125165 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-05-19 23:02:52 +00:00
parent 897ad47877
commit ba7845ae74
3 changed files with 32 additions and 2 deletions

View File

@ -30,7 +30,15 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Base class for all Lucene unit tests that use TokenStreams.
* Base class for all Lucene unit tests that use TokenStreams.
* <p>
* When writing unit tests for analysis components, its highly recommended
* to use the helper methods here (especially in conjunction with {@link MockAnalyzer} or
* {@link MockTokenizer}), as they contain many assertions and checks to
* catch bugs.
*
* @see MockAnalyzer
* @see MockTokenizer
*/
public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
// some helpers to test Analyzers and TokenStreams:

View File

@ -27,6 +27,19 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
/**
* Analyzer for testing
* <p>
* This analyzer is a replacement for Whitespace/Simple/KeywordAnalyzers
* for unit tests. If you are testing a custom component such as a queryparser
* or analyzer-wrapper that consumes analysis streams, its a great idea to test
* it with this analyzer instead. MockAnalyzer has the following behavior:
* <ul>
* <li>By default, the assertions in {@link MockTokenizer} are turned on for extra
* checks that the consumer is consuming properly. These checks can be disabled
* with {@link #setEnableChecks(boolean)}.
* <li>Payload data is randomly injected into the stream for more thorough testing
* of payloads.
* </ul>
* @see MockTokenizer
*/
public final class MockAnalyzer extends Analyzer {
private final CharacterRunAutomaton runAutomaton;

View File

@ -26,7 +26,16 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
/**
* Automaton-based tokenizer for testing. Optionally lowercases.
* Tokenizer for testing.
* <p>
* This tokenizer is a replacement for {@link #WHITESPACE}, {@link #SIMPLE}, and {@link #KEYWORD}
* tokenizers. If you are writing a component such as a TokenFilter, its a great idea to test
* it wrapping this tokenizer instead for extra checks. This tokenizer has the following behavior:
* <ul>
* <li>An internal state-machine is used for checking consumer consistency. These checks can
* be disabled with {@link #setEnableChecks(boolean)}.
* <li>For convenience, optionally lowercases terms that it outputs.
* </ul>
*/
public class MockTokenizer extends Tokenizer {
/** Acts Similar to WhitespaceTokenizer */