mirror of https://github.com/apache/lucene.git
basic javadocs improvements, mostly simple descriptions where the class had nothing before
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1302752 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
870c47f4d7
commit
790323780f
|
@ -353,6 +353,9 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Specifies configuration parameters for BalancedSegmentMergePolicy.
|
||||
*/
|
||||
public static class MergePolicyParams {
|
||||
private int _numLargeSegments;
|
||||
private int _maxSmallSegments;
|
||||
|
|
|
@ -37,6 +37,9 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.CommandLineUtil;
|
||||
|
||||
/**
|
||||
* Command-line tool for extracting sub-files out of a compound file.
|
||||
*/
|
||||
public class CompoundFileExtractor {
|
||||
|
||||
public static void main(String [] args) {
|
||||
|
@ -63,7 +66,7 @@ public class CompoundFileExtractor {
|
|||
}
|
||||
|
||||
if (filename == null) {
|
||||
System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] [-dir-impl X] <cfsfile>");
|
||||
System.out.println("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] <cfsfile>");
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.store.FSDirectory;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
/*
|
||||
/**
|
||||
* Utility to get document frequency and total number of occurrences (sum of the tf for each doc) of a term.
|
||||
*/
|
||||
public class GetTermInfo {
|
||||
|
|
|
@ -20,7 +20,13 @@ import org.apache.lucene.index.DocValues.Type;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** @lucene.experimental */
|
||||
/**
|
||||
* Access to the Fieldable Info file that describes document fields and whether or
|
||||
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
|
||||
* of this class are thread-safe for multiple readers, but only one thread can
|
||||
* be adding documents at a time, with no other reader or writer threads
|
||||
* accessing this object.
|
||||
**/
|
||||
public final class FieldInfo {
|
||||
public final String name;
|
||||
public final int number;
|
||||
|
|
|
@ -26,11 +26,8 @@ import java.util.TreeMap;
|
|||
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
|
||||
/** Access to the Field Info file that describes document fields and whether or
|
||||
* not they are indexed. Each segment has a separate Field Info file. Objects
|
||||
* of this class are thread-safe for multiple readers, but only one thread can
|
||||
* be adding documents at a time, with no other reader or writer threads
|
||||
* accessing this object.
|
||||
/**
|
||||
* Collection of {@link FieldInfo}s (accessible by number or by name).
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class FieldInfos implements Iterable<FieldInfo> {
|
||||
|
|
|
@ -28,6 +28,10 @@ import org.apache.lucene.store.IOContext;
|
|||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
* IndexReader implementation over a single segment.
|
||||
* <p>
|
||||
* Instances pointing to the same segment (but with different deletes, etc)
|
||||
* may share the same core data.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class SegmentReader extends AtomicReader {
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.util.InfoStream;
|
|||
import org.apache.lucene.util.MutableBits;
|
||||
|
||||
/**
|
||||
* Holder class for common parameters used during write.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SegmentWriteState {
|
||||
|
|
|
@ -35,7 +35,7 @@ import java.util.ArrayList;
|
|||
* separates how many segments are merged at once ({@link
|
||||
* #setMaxMergeAtOnce}) from how many segments are allowed
|
||||
* per tier ({@link #setSegmentsPerTier}). This merge
|
||||
* policy also does not over-merge (ie, cascade merges).
|
||||
* policy also does not over-merge (i.e. cascade merges).
|
||||
*
|
||||
* <p>For normal merging, this policy first computes a
|
||||
* "budget" of how many segments are allowed by be in the
|
||||
|
@ -43,8 +43,8 @@ import java.util.ArrayList;
|
|||
* sorts segments by decreasing size (pro-rating by percent
|
||||
* deletes), and then finds the least-cost merge. Merge
|
||||
* cost is measured by a combination of the "skew" of the
|
||||
* merge (size of largest seg divided by smallest seg),
|
||||
* total merge size and pct deletes reclaimed,
|
||||
* merge (size of largest segment divided by smallest segment),
|
||||
* total merge size and percent deletes reclaimed,
|
||||
* so that merges with lower skew, smaller size
|
||||
* and those reclaiming more deletes, are
|
||||
* favored.
|
||||
|
|
|
@ -32,8 +32,7 @@ import java.util.Set;
|
|||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* Base class for filtering a SpanQuery based on the position of a match.
|
||||
**/
|
||||
public abstract class SpanPositionCheckQuery extends SpanQuery implements Cloneable {
|
||||
protected SpanQuery match;
|
||||
|
|
|
@ -21,7 +21,12 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** @lucene.experimental */
|
||||
/**
|
||||
* DataInput backed by a byte array.
|
||||
* <b>WARNING:</b> This class omits most low-level checks,
|
||||
* so be sure to test heavily with assertions enabled.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class ByteArrayDataInput extends DataInput {
|
||||
|
||||
private byte[] bytes;
|
||||
|
|
|
@ -20,6 +20,9 @@ package org.apache.lucene.store;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* DataOutput backed by a byte array.
|
||||
* <b>WARNING:</b> This class omits most low-level checks,
|
||||
* so be sure to test heavily with assertions enabled.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ByteArrayDataOutput extends DataOutput {
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.util;
|
|||
*/
|
||||
|
||||
/**
|
||||
* Interface for Bitset-like structures.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
|
@ -27,6 +28,9 @@ public interface Bits {
|
|||
|
||||
public static final Bits[] EMPTY_ARRAY = new Bits[0];
|
||||
|
||||
/**
|
||||
* Bits impl of the specified length with all bits set.
|
||||
*/
|
||||
public static class MatchAllBits implements Bits {
|
||||
final int len;
|
||||
|
||||
|
@ -43,6 +47,9 @@ public interface Bits {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bits impl of the specified length with no bits set.
|
||||
*/
|
||||
public static class MatchNoBits implements Bits {
|
||||
final int len;
|
||||
|
||||
|
|
|
@ -27,6 +27,9 @@ import org.apache.lucene.store.DataInput;
|
|||
import org.apache.lucene.store.DataOutput;
|
||||
|
||||
/**
|
||||
* Utility class for reading and writing versioned headers.
|
||||
* This is useful to ensure that a file is in the format
|
||||
* you think it is.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
|
|
|
@ -49,10 +49,18 @@ import org.apache.lucene.util._TestUtil;
|
|||
public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
||||
// some helpers to test Analyzers and TokenStreams:
|
||||
|
||||
/**
|
||||
* Attribute that records if it was cleared or not. This is used
|
||||
* for testing that clearAttributes() was called correctly.
|
||||
*/
|
||||
public static interface CheckClearAttributesAttribute extends Attribute {
|
||||
boolean getAndResetClearCalled();
|
||||
}
|
||||
|
||||
/**
|
||||
* Attribute that records if it was cleared or not. This is used
|
||||
* for testing that clearAttributes() was called correctly.
|
||||
*/
|
||||
public static final class CheckClearAttributesAttributeImpl extends AttributeImpl implements CheckClearAttributesAttribute {
|
||||
private boolean clearCalled = false;
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
/**
|
||||
* emits a canned set of {@link Token}
|
||||
* TokenStream from a canned list of Tokens.
|
||||
*/
|
||||
public final class CannedTokenStream extends TokenStream {
|
||||
private final Token[] tokens;
|
||||
|
|
|
@ -48,6 +48,9 @@ import java.io.IOException;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Base test class for testing Unicode collation.
|
||||
*/
|
||||
public abstract class CollationTestBase extends LuceneTestCase {
|
||||
|
||||
protected String firstRangeBeginningOriginal = "\u062F";
|
||||
|
|
|
@ -23,6 +23,9 @@ import java.util.Random;
|
|||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.index.Payload;
|
||||
|
||||
/**
|
||||
* TokenFilter that adds random fixed-length payloads.
|
||||
*/
|
||||
public final class MockFixedLengthPayloadFilter extends TokenFilter {
|
||||
private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
|
||||
private final Random random;
|
||||
|
|
|
@ -23,6 +23,9 @@ import java.util.Random;
|
|||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.index.Payload;
|
||||
|
||||
/**
|
||||
* TokenFilter that adds random variable-length payloads.
|
||||
*/
|
||||
public final class MockVariableLengthPayloadFilter extends TokenFilter {
|
||||
private static final int MAXLENGTH = 129;
|
||||
|
||||
|
|
|
@ -19,6 +19,11 @@ package org.apache.lucene.index;
|
|||
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
|
||||
// TODO: what is this used for? just testing BufferedIndexInput?
|
||||
// if so it should be pkg-private. otherwise its a dup of ByteArrayIndexInput?
|
||||
/**
|
||||
* IndexInput backed by a byte[] for testing.
|
||||
*/
|
||||
public class MockIndexInput extends BufferedIndexInput {
|
||||
private byte[] buffer;
|
||||
private int pointer = 0;
|
||||
|
|
|
@ -26,6 +26,9 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* MergePolicy that makes random decisions for testing.
|
||||
*/
|
||||
public class MockRandomMergePolicy extends MergePolicy {
|
||||
private final Random random;
|
||||
|
||||
|
|
|
@ -30,6 +30,9 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
* Utility class for asserting expected hits in tests.
|
||||
*/
|
||||
public class CheckHits {
|
||||
|
||||
/**
|
||||
|
@ -118,6 +121,9 @@ public class CheckHits {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Just collects document ids into a set.
|
||||
*/
|
||||
public static class SetCollector extends Collector {
|
||||
final Set<Integer> bag;
|
||||
public SetCollector(Set<Integer> bag) {
|
||||
|
|
|
@ -41,9 +41,9 @@ import org.apache.lucene.util._TestUtil;
|
|||
|
||||
import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Utility class for sanity-checking queries.
|
||||
*/
|
||||
public class QueryUtils {
|
||||
|
||||
/** Check the types of things query objects should be able to do. */
|
||||
|
|
|
@ -19,6 +19,10 @@ package org.apache.lucene.store;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Used by MockDirectoryWrapper to wrap another factory
|
||||
* and track open locks.
|
||||
*/
|
||||
public class MockLockFactoryWrapper extends LockFactory {
|
||||
MockDirectoryWrapper dir;
|
||||
LockFactory delegate;
|
||||
|
|
|
@ -21,6 +21,9 @@ import java.io.IOException;
|
|||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
/**
|
||||
* Intentionally slow IndexOutput for testing.
|
||||
*/
|
||||
public class ThrottledIndexOutput extends IndexOutput {
|
||||
public static final int DEFAULT_MIN_WRITTEN_BYTES = 1024;
|
||||
private final int bytesPerSecond;
|
||||
|
|
|
@ -63,6 +63,9 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.junit.Assert;
|
||||
|
||||
/**
|
||||
* General utility methods for Lucene unit tests.
|
||||
*/
|
||||
public class _TestUtil {
|
||||
|
||||
/** Returns temp dir, based on String arg in its name;
|
||||
|
|
|
@ -29,6 +29,10 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* In-memory structure for the dictionary (.dic) and affix (.aff)
|
||||
* data of a hunspell dictionary.
|
||||
*/
|
||||
public class HunspellDictionary {
|
||||
|
||||
static final HunspellWord NOFLAGS = new HunspellWord();
|
||||
|
|
|
@ -19,6 +19,9 @@ package org.apache.lucene.analysis.hunspell;
|
|||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* A dictionary (.dic) entry with its associated flags.
|
||||
*/
|
||||
public class HunspellWord {
|
||||
|
||||
private final char flags[]; // sorted, can we represent more concisely?
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
/**
|
||||
*
|
||||
* Tokenizer for path-like hierarchies.
|
||||
* <p>
|
||||
* Take something like:
|
||||
*
|
||||
* <pre>
|
||||
|
|
|
@ -27,7 +27,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
/**
|
||||
*
|
||||
* Tokenizer for domain-like hierarchies.
|
||||
* <p>
|
||||
* Take something like:
|
||||
*
|
||||
* <pre>
|
||||
|
|
|
@ -182,6 +182,9 @@ public final class TeeSinkTokenFilter extends TokenFilter {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* TokenStream output from a tee with optional filtering.
|
||||
*/
|
||||
public static final class SinkTokenStream extends TokenStream {
|
||||
private final List<AttributeSource.State> cachedStates = new LinkedList<AttributeSource.State>();
|
||||
private AttributeSource.State finalState;
|
||||
|
|
|
@ -20,6 +20,9 @@ package org.apache.lucene.analysis.sinks;
|
|||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/**
|
||||
* Adds a token to the sink if it has a specific type.
|
||||
*/
|
||||
public class TokenTypeSinkFilter extends TeeSinkTokenFilter.SinkFilter {
|
||||
private String typeToMatch;
|
||||
private TypeAttribute typeAtt;
|
||||
|
|
|
@ -22,7 +22,10 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
|
||||
/** @lucene.internal */
|
||||
/**
|
||||
* Internal interface for supporting versioned grammars.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public interface StandardTokenizerInterface {
|
||||
|
||||
/** This character denotes the end of file */
|
||||
|
|
|
@ -31,6 +31,9 @@ import org.apache.lucene.analysis.kuromoji.dict.Dictionary;
|
|||
// TODO: would be nice to show 2nd best path in a diff't
|
||||
// color...
|
||||
|
||||
/**
|
||||
* Outputs the dot (graphviz) string for the viterbi lattice.
|
||||
*/
|
||||
public class GraphvizFormatter {
|
||||
|
||||
private final static String BOS_LABEL = "BOS";
|
||||
|
|
|
@ -33,6 +33,9 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
|||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Analyzer for Japanese that uses morphological analysis.
|
||||
*/
|
||||
public class KuromojiAnalyzer extends StopwordAnalyzerBase {
|
||||
private final Mode mode;
|
||||
private final Set<String> stoptags;
|
||||
|
|
|
@ -55,6 +55,9 @@ import org.apache.lucene.util.fst.FST;
|
|||
* penalties to the long tokens. If so, and the Mode is
|
||||
* SEARCH_WITH_COMPOUND, we output the alternate
|
||||
* segmentation as well. */
|
||||
/**
|
||||
* Tokenizer for Japanese that uses morphological analysis.
|
||||
*/
|
||||
public final class KuromojiTokenizer extends Tokenizer {
|
||||
|
||||
public static enum Mode {
|
||||
|
|
|
@ -20,6 +20,9 @@ package org.apache.lucene.analysis.kuromoji;
|
|||
import org.apache.lucene.analysis.kuromoji.KuromojiTokenizer.Type;
|
||||
import org.apache.lucene.analysis.kuromoji.dict.Dictionary;
|
||||
|
||||
/**
|
||||
* Analyzed token with morphological data from its dictionary.
|
||||
*/
|
||||
public class Token {
|
||||
private final Dictionary dictionary;
|
||||
|
||||
|
|
|
@ -32,6 +32,9 @@ import org.apache.lucene.util.CodecUtil;
|
|||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Base class for a binary-encoded in-memory dictionary.
|
||||
*/
|
||||
public abstract class BinaryDictionary implements Dictionary {
|
||||
|
||||
public static final String DICT_FILENAME_SUFFIX = "$buffer.dat";
|
||||
|
|
|
@ -26,6 +26,9 @@ import org.apache.lucene.store.InputStreamDataInput;
|
|||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Character category data.
|
||||
*/
|
||||
public final class CharacterDefinition {
|
||||
|
||||
public static final String FILENAME_SUFFIX = ".dat";
|
||||
|
|
|
@ -26,6 +26,9 @@ import org.apache.lucene.store.InputStreamDataInput;
|
|||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* n-gram connection cost data
|
||||
*/
|
||||
public final class ConnectionCosts {
|
||||
|
||||
public static final String FILENAME_SUFFIX = ".dat";
|
||||
|
|
|
@ -17,6 +17,10 @@ package org.apache.lucene.analysis.kuromoji.dict;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Dictionary interface for retrieving morphological data
|
||||
* by id.
|
||||
*/
|
||||
public interface Dictionary {
|
||||
|
||||
public static final String INTERNAL_SEPARATOR = "\u0000";
|
||||
|
|
|
@ -26,6 +26,10 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
||||
/**
|
||||
* Binary dictionary implementation for a known-word dictionary model:
|
||||
* Words are encoded into an FST mapping to a list of wordIDs.
|
||||
*/
|
||||
public final class TokenInfoDictionary extends BinaryDictionary {
|
||||
|
||||
public static final String FST_FILENAME_SUFFIX = "$fst.dat";
|
||||
|
|
|
@ -22,6 +22,13 @@ import java.io.IOException;
|
|||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FST.Arc;
|
||||
|
||||
/**
|
||||
* Thin wrapper around an FST with root-arc caching for Japanese.
|
||||
* <p>
|
||||
* Depending upon fasterButMoreRam, either just kana (191 arcs),
|
||||
* or kana and han (28,607 arcs) are cached. The latter offers
|
||||
* additional performance at the cost of more RAM.
|
||||
*/
|
||||
public final class TokenInfoFST {
|
||||
private final FST<Long> fst;
|
||||
|
||||
|
|
|
@ -19,6 +19,9 @@ package org.apache.lucene.analysis.kuromoji.dict;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Dictionary for unknown-word handling.
|
||||
*/
|
||||
public final class UnknownDictionary extends BinaryDictionary {
|
||||
|
||||
private final CharacterDefinition characterDefinition = CharacterDefinition.getInstance();
|
||||
|
|
|
@ -34,6 +34,10 @@ import org.apache.lucene.util.fst.Builder;
|
|||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
||||
/**
|
||||
* Class for building a User Dictionary.
|
||||
* This class allows for custom segmentation of phrases.
|
||||
*/
|
||||
public final class UserDictionary implements Dictionary {
|
||||
|
||||
// phrase text -> phrase ID
|
||||
|
|
|
@ -21,6 +21,9 @@ import org.apache.lucene.analysis.kuromoji.Token;
|
|||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/**
|
||||
* Attribute for {@link Token#getBaseForm()}.
|
||||
*/
|
||||
public class BaseFormAttributeImpl extends AttributeImpl implements BaseFormAttribute, Cloneable {
|
||||
private Token token;
|
||||
|
||||
|
|
|
@ -22,6 +22,9 @@ import org.apache.lucene.analysis.kuromoji.util.ToStringUtil;
|
|||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/**
|
||||
* Attribute for Kuromoji inflection data.
|
||||
*/
|
||||
public class InflectionAttributeImpl extends AttributeImpl implements InflectionAttribute, Cloneable {
|
||||
private Token token;
|
||||
|
||||
|
|
|
@ -22,6 +22,9 @@ import org.apache.lucene.analysis.kuromoji.util.ToStringUtil;
|
|||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/**
|
||||
* Attribute for {@link Token#getPartOfSpeech()}.
|
||||
*/
|
||||
public class PartOfSpeechAttributeImpl extends AttributeImpl implements PartOfSpeechAttribute, Cloneable {
|
||||
private Token token;
|
||||
|
||||
|
|
|
@ -22,6 +22,9 @@ import org.apache.lucene.analysis.kuromoji.util.ToStringUtil;
|
|||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/**
|
||||
* Attribute for Kuromoji reading data
|
||||
*/
|
||||
public class ReadingAttributeImpl extends AttributeImpl implements ReadingAttribute, Cloneable {
|
||||
private Token token;
|
||||
|
||||
|
|
|
@ -21,6 +21,9 @@ import java.util.ArrayList;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Utility class for parsing CSV text
|
||||
*/
|
||||
public final class CSVUtil {
|
||||
private static final char QUOTE = '"';
|
||||
|
||||
|
|
|
@ -19,6 +19,10 @@ package org.apache.lucene.analysis.kuromoji.util;
|
|||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Utility class for english translations of morphological data,
|
||||
* used only for debugging.
|
||||
*/
|
||||
public class ToStringUtil {
|
||||
// a translation map for parts of speech, only used for reflectWith
|
||||
private static final HashMap<String,String> posTranslations = new HashMap<String,String>();
|
||||
|
|
|
@ -25,6 +25,9 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
/**
|
||||
* Filter for DoubleMetaphone (supporting secondary codes)
|
||||
*/
|
||||
public final class DoubleMetaphoneFilter extends TokenFilter {
|
||||
|
||||
private static final String TOKEN_TYPE = "DoubleMetaphone";
|
||||
|
|
|
@ -18,8 +18,7 @@ package org.apache.lucene.benchmark;
|
|||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* Various benchmarking constants (mostly defaults)
|
||||
**/
|
||||
public class Constants
|
||||
{
|
||||
|
|
|
@ -49,6 +49,9 @@ public class DirContentSource extends ContentSource {
|
|||
ParsePosition pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterator over the files in the directory
|
||||
*/
|
||||
public static class Iterator implements java.util.Iterator<File> {
|
||||
|
||||
static class Comparator implements java.util.Comparator<File> {
|
||||
|
|
|
@ -29,9 +29,9 @@ import org.apache.lucene.util.Version;
|
|||
import com.ibm.icu.text.RuleBasedNumberFormat;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
* Creates queries whose content is a spelled-out <code>long</code> number
|
||||
* starting from <code>{@link Long#MIN_VALUE} + 10</code>.
|
||||
*/
|
||||
public class LongToEnglishQueryMaker implements QueryMaker {
|
||||
long counter = Long.MIN_VALUE + 10;
|
||||
protected QueryParser parser;
|
||||
|
|
|
@ -21,6 +21,9 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
/**
|
||||
* Opens a reader and prints basic statistics.
|
||||
*/
|
||||
public class PrintReaderTask extends PerfTask {
|
||||
private String userData = null;
|
||||
|
||||
|
|
|
@ -33,8 +33,7 @@ import java.util.Set;
|
|||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* Command-line tool for doing a TREC evaluation run.
|
||||
**/
|
||||
public class QueryDriver {
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
|
|
@ -19,6 +19,11 @@ package org.apache.lucene.search.spell;
|
|||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Similarity measure for short strings such as person names.
|
||||
* <p>
|
||||
* @see <a href="http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance">http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance</a>
|
||||
*/
|
||||
public class JaroWinklerDistance implements StringDistance {
|
||||
|
||||
private float threshold = 0.7f;
|
||||
|
|
|
@ -23,10 +23,17 @@ import java.util.Comparator;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
|
||||
/**
|
||||
* Interface for enumerating term,weight pairs.
|
||||
*/
|
||||
public interface TermFreqIterator extends BytesRefIterator {
|
||||
|
||||
public long weight();
|
||||
|
||||
/**
|
||||
* Wraps a BytesRefIterator as a TermFreqIterator, with all weights
|
||||
* set to <code>1</code>
|
||||
*/
|
||||
public static class TermFreqIteratorWrapper implements TermFreqIterator {
|
||||
private BytesRefIterator wrapped;
|
||||
|
||||
|
|
Loading…
Reference in New Issue