mirror of https://github.com/apache/lucene.git
Merging r1073114 through r1074014 into realtime branch
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/realtime_search@1074015 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
768f61a62b
|
@ -739,6 +739,13 @@ Bug fixes
|
|||
* LUCENE-2891: IndexWriterConfig did not accept -1 in setReaderTermIndexDivisor,
|
||||
which can be used to prevent loading the terms index into memory. (Shai Erera)
|
||||
|
||||
* LUCENE-2937: Encoding a float into a byte (e.g. encoding field norms during
|
||||
indexing) had an underflow detection bug that caused floatToByte(f)==0 where
|
||||
f was greater than 0, but slightly less than byteToFloat(1). This meant that
|
||||
certain very small field norms (index_boost * length_norm) could have
|
||||
been rounded down to 0 instead of being rounded up to the smallest
|
||||
positive number. (yonik)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-2128: Parallelized fetching document frequencies during weight
|
||||
|
@ -1055,7 +1062,7 @@ Documentation
|
|||
(Adriano Crestani via Robert Muir)
|
||||
|
||||
* LUCENE-2894: Use google-code-prettify for syntax highlighting in javadoc.
|
||||
(Koji Sekiguchi)
|
||||
(Shinichiro Abe, Koji Sekiguchi)
|
||||
|
||||
================== Release 2.9.4 / 3.0.3 2010-12-03 ====================
|
||||
|
||||
|
|
|
@ -79,6 +79,13 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
// FreqProxTermsWriter does this with
|
||||
// FieldInfo.storePayload.
|
||||
final String fileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELD_INFOS_EXTENSION);
|
||||
|
||||
// If this segment only has docs that hit non-aborting exceptions,
|
||||
// then no term vectors files will have been written; therefore we
|
||||
// need to update the fieldInfos and clear the term vectors bits
|
||||
if (!state.hasVectors) {
|
||||
state.fieldInfos.clearVectors();
|
||||
}
|
||||
state.fieldInfos.write(state.directory, fileName);
|
||||
}
|
||||
|
||||
|
|
|
@ -102,7 +102,6 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
return byNumber.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<FieldInfo> iterator() {
|
||||
return byNumber.values().iterator();
|
||||
}
|
||||
|
@ -404,6 +403,14 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
return false;
|
||||
}
|
||||
|
||||
void clearVectors() {
|
||||
for (FieldInfo fi : this) {
|
||||
fi.storeTermVector = false;
|
||||
fi.storeOffsetWithTermVector = false;
|
||||
fi.storePositionWithTermVector = false;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasNorms() {
|
||||
for (FieldInfo fi : this) {
|
||||
if (!fi.omitNorms) {
|
||||
|
|
|
@ -39,7 +39,7 @@ public class SmallFloat {
|
|||
int fzero = (63-zeroExp)<<numMantissaBits;
|
||||
int bits = Float.floatToRawIntBits(f);
|
||||
int smallfloat = bits >> (24-numMantissaBits);
|
||||
if (smallfloat < fzero) {
|
||||
if (smallfloat <= fzero) {
|
||||
return (bits<=0) ?
|
||||
(byte)0 // negative numbers and zero both map to 0 byte
|
||||
:(byte)1; // underflow is mapped to smallest non-zero number.
|
||||
|
@ -75,7 +75,7 @@ public class SmallFloat {
|
|||
public static byte floatToByte315(float f) {
|
||||
int bits = Float.floatToRawIntBits(f);
|
||||
int smallfloat = bits >> (24-3);
|
||||
if (smallfloat < (63-15)<<3) {
|
||||
if (smallfloat <= ((63-15)<<3)) {
|
||||
return (bits<=0) ? (byte)0 : (byte)1;
|
||||
}
|
||||
if (smallfloat >= ((63-15)<<3) + 0x100) {
|
||||
|
@ -103,7 +103,7 @@ public class SmallFloat {
|
|||
public static byte floatToByte52(float f) {
|
||||
int bits = Float.floatToRawIntBits(f);
|
||||
int smallfloat = bits >> (24-5);
|
||||
if (smallfloat < (63-2)<<5) {
|
||||
if (smallfloat <= (63-2)<<5) {
|
||||
return (bits<=0) ? (byte)0 : (byte)1;
|
||||
}
|
||||
if (smallfloat >= ((63-2)<<5) + 0x100) {
|
||||
|
|
|
@ -83,7 +83,7 @@ public class Builder<T> {
|
|||
@SuppressWarnings("unchecked") final UnCompiledNode<T>[] f = (UnCompiledNode<T>[]) new UnCompiledNode[10];
|
||||
frontier = f;
|
||||
for(int idx=0;idx<frontier.length;idx++) {
|
||||
frontier[idx] = new UnCompiledNode<T>(this);
|
||||
frontier[idx] = new UnCompiledNode<T>(this, idx);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -201,7 +201,7 @@ public class Builder<T> {
|
|||
// undecided on whether to prune it. later, it
|
||||
// will be either compiled or pruned, so we must
|
||||
// allocate a new node:
|
||||
frontier[idx] = new UnCompiledNode<T>(this);
|
||||
frontier[idx] = new UnCompiledNode<T>(this, idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -292,7 +292,7 @@ public class Builder<T> {
|
|||
new UnCompiledNode[ArrayUtil.oversize(input.length+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
System.arraycopy(frontier, 0, next, 0, frontier.length);
|
||||
for(int idx=frontier.length;idx<next.length;idx++) {
|
||||
next[idx] = new UnCompiledNode<T>(this);
|
||||
next[idx] = new UnCompiledNode<T>(this, idx);
|
||||
}
|
||||
frontier = next;
|
||||
}
|
||||
|
@ -424,12 +424,22 @@ public class Builder<T> {
|
|||
boolean isFinal;
|
||||
int inputCount;
|
||||
|
||||
/** This node's depth, starting from the automaton root. */
|
||||
final int depth;
|
||||
|
||||
/**
|
||||
* @param depth
|
||||
* The node's depth starting from the automaton root. Needed for
|
||||
* LUCENE-2934 (node expansion based on conditions other than the
|
||||
* fanout size).
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public UnCompiledNode(Builder<T> owner) {
|
||||
public UnCompiledNode(Builder<T> owner, int depth) {
|
||||
this.owner = owner;
|
||||
arcs = (Arc<T>[]) new Arc[1];
|
||||
arcs[0] = new Arc<T>();
|
||||
output = owner.NO_OUTPUT;
|
||||
this.depth = depth;
|
||||
}
|
||||
|
||||
public boolean isCompiled() {
|
||||
|
@ -441,6 +451,9 @@ public class Builder<T> {
|
|||
isFinal = false;
|
||||
output = owner.NO_OUTPUT;
|
||||
inputCount = 0;
|
||||
|
||||
// We don't clear the depth here because it never changes
|
||||
// for nodes on the frontier (even when reused).
|
||||
}
|
||||
|
||||
public T getLastOutput(int labelToMatch) {
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode;
|
||||
|
||||
/** Represents an FST using a compact byte[] format.
|
||||
* <p> The format is similar to what's used by Morfologik
|
||||
|
@ -47,11 +48,21 @@ public class FST<T> {
|
|||
// this when number of arcs is > NUM_ARCS_ARRAY:
|
||||
private final static int BIT_ARCS_AS_FIXED_ARRAY = 1 << 6;
|
||||
|
||||
// If the node has >= this number of arcs, the arcs are
|
||||
// stored as a fixed array. Fixed array consumes more RAM
|
||||
// but enables binary search on the arcs (instead of
|
||||
// linear scan) on lookup by arc label:
|
||||
private final static int NUM_ARCS_FIXED_ARRAY = 10;
|
||||
/**
|
||||
* @see #shouldExpand(UnCompiledNode)
|
||||
*/
|
||||
final static int FIXED_ARRAY_SHALLOW_DISTANCE = 3; // 0 => only root node.
|
||||
|
||||
/**
|
||||
* @see #shouldExpand(UnCompiledNode)
|
||||
*/
|
||||
final static int FIXED_ARRAY_NUM_ARCS_SHALLOW = 5;
|
||||
|
||||
/**
|
||||
* @see #shouldExpand(UnCompiledNode)
|
||||
*/
|
||||
final static int FIXED_ARRAY_NUM_ARCS_DEEP = 10;
|
||||
|
||||
private int[] bytesPerArc = new int[0];
|
||||
|
||||
// Increment version to change it
|
||||
|
@ -315,7 +326,7 @@ public class FST<T> {
|
|||
int startAddress = writer.posWrite;
|
||||
//System.out.println(" startAddr=" + startAddress);
|
||||
|
||||
final boolean doFixedArray = node.numArcs >= NUM_ARCS_FIXED_ARRAY;
|
||||
final boolean doFixedArray = shouldExpand(node);
|
||||
final int fixedArrayStart;
|
||||
if (doFixedArray) {
|
||||
if (bytesPerArc.length < node.numArcs) {
|
||||
|
@ -518,6 +529,23 @@ public class FST<T> {
|
|||
return readNextArc(arc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if <code>arc</code>'s target state is in expanded (or vector) format.
|
||||
*
|
||||
* @return Returns <code>true</code> if <code>arc</code> points to a state in an
|
||||
* expanded array format.
|
||||
*/
|
||||
boolean isExpandedTarget(Arc<T> follow) throws IOException {
|
||||
if (follow.isFinal()) {
|
||||
return false;
|
||||
} else {
|
||||
final BytesReader in = getBytesReader(follow.target);
|
||||
final byte b = in.readByte();
|
||||
|
||||
return (b & BIT_ARCS_AS_FIXED_ARRAY) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** In-place read; returns the arc. */
|
||||
public Arc<T> readNextArc(Arc<T> arc) throws IOException {
|
||||
if (arc.label == -1) {
|
||||
|
@ -712,6 +740,26 @@ public class FST<T> {
|
|||
public int getArcWithOutputCount() {
|
||||
return arcWithOutputCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Nodes will be expanded if their depth (distance from the root node) is
|
||||
* <= this value and their number of arcs is >=
|
||||
* {@link #FIXED_ARRAY_NUM_ARCS_SHALLOW}.
|
||||
*
|
||||
* <p>
|
||||
* Fixed array consumes more RAM but enables binary search on the arcs
|
||||
* (instead of a linear scan) on lookup by arc label.
|
||||
*
|
||||
* @return <code>true</code> if <code>node</code> should be stored in an
|
||||
* expanded (array) form.
|
||||
*
|
||||
* @see #FIXED_ARRAY_NUM_ARCS_DEEP
|
||||
* @see Builder.UnCompiledNode#depth
|
||||
*/
|
||||
private boolean shouldExpand(UnCompiledNode<T> node) {
|
||||
return (node.depth <= FIXED_ARRAY_SHALLOW_DISTANCE && node.numArcs >= FIXED_ARRAY_NUM_ARCS_SHALLOW) ||
|
||||
node.numArcs >= FIXED_ARRAY_NUM_ARCS_DEEP;
|
||||
}
|
||||
|
||||
// Non-static: writes to FST's byte[]
|
||||
class BytesWriter extends DataOutput {
|
||||
|
|
|
@ -189,6 +189,8 @@ public final class Util {
|
|||
*/
|
||||
public static <T> void toDot(FST<T> fst, Writer out, boolean sameRank, boolean labelStates)
|
||||
throws IOException {
|
||||
final String expandedNodeColor = "blue";
|
||||
|
||||
// This is the start arc in the automaton (from the epsilon state to the first state
|
||||
// with outgoing transitions.
|
||||
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
|
||||
|
@ -219,7 +221,9 @@ public final class Util {
|
|||
}
|
||||
|
||||
emitDotState(out, "initial", "point", "white", "");
|
||||
emitDotState(out, Integer.toString(startArc.target), stateShape, null, "");
|
||||
emitDotState(out, Integer.toString(startArc.target), stateShape,
|
||||
fst.isExpandedTarget(startArc) ? expandedNodeColor : null,
|
||||
"");
|
||||
out.write(" initial -> " + startArc.target + "\n");
|
||||
|
||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
|
@ -243,7 +247,9 @@ public final class Util {
|
|||
while (true) {
|
||||
// Emit the unseen state and add it to the queue for the next level.
|
||||
if (arc.target >= 0 && !seen.get(arc.target)) {
|
||||
emitDotState(out, Integer.toString(arc.target), stateShape, null,
|
||||
final boolean isExpanded = fst.isExpandedTarget(arc);
|
||||
emitDotState(out, Integer.toString(arc.target), stateShape,
|
||||
isExpanded ? expandedNodeColor : null,
|
||||
labelStates ? Integer.toString(arc.target) : "");
|
||||
seen.set(arc.target);
|
||||
nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
|
||||
|
@ -285,10 +291,10 @@ public final class Util {
|
|||
}
|
||||
sameLevelStates.clear();
|
||||
}
|
||||
|
||||
|
||||
// Emit terminating state (always there anyway).
|
||||
out.write(" -1 [style=filled, color=black, shape=circle, label=\"\"]\n\n");
|
||||
out.write(" {rank=sink; -1 } ");
|
||||
out.write(" {rank=sink; -1 }\n");
|
||||
|
||||
out.write("}\n");
|
||||
out.flush();
|
||||
|
|
|
@ -28,8 +28,8 @@ public class TestSmallFloat extends LuceneTestCase {
|
|||
return Float.intBitsToFloat(bits);
|
||||
}
|
||||
|
||||
// original lucene floatToByte
|
||||
static byte orig_floatToByte(float f) {
|
||||
// original lucene floatToByte (since lucene 1.3)
|
||||
static byte orig_floatToByte_v13(float f) {
|
||||
if (f < 0.0f) // round negatives up to zero
|
||||
f = 0.0f;
|
||||
|
||||
|
@ -53,6 +53,33 @@ public class TestSmallFloat extends LuceneTestCase {
|
|||
return (byte)((exponent << 3) | mantissa); // pack into a byte
|
||||
}
|
||||
|
||||
// This is the original lucene floatToBytes (from v1.3)
|
||||
// except with the underflow detection bug fixed for values like 5.8123817E-10f
|
||||
static byte orig_floatToByte(float f) {
|
||||
if (f < 0.0f) // round negatives up to zero
|
||||
f = 0.0f;
|
||||
|
||||
if (f == 0.0f) // zero is a special case
|
||||
return 0;
|
||||
|
||||
int bits = Float.floatToIntBits(f); // parse float into parts
|
||||
int mantissa = (bits & 0xffffff) >> 21;
|
||||
int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
|
||||
|
||||
if (exponent > 31) { // overflow: use max value
|
||||
exponent = 31;
|
||||
mantissa = 7;
|
||||
}
|
||||
|
||||
if (exponent < 0 || exponent == 0 && mantissa == 0) { // underflow: use min value
|
||||
exponent = 0;
|
||||
mantissa = 1;
|
||||
}
|
||||
|
||||
return (byte)((exponent << 3) | mantissa); // pack into a byte
|
||||
}
|
||||
|
||||
|
||||
public void testByteToFloat() {
|
||||
for (int i=0; i<256; i++) {
|
||||
float f1 = orig_byteToFloat((byte)i);
|
||||
|
@ -68,6 +95,22 @@ public class TestSmallFloat extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testFloatToByte() {
|
||||
assertEquals(0, orig_floatToByte_v13(5.8123817E-10f)); // verify the old bug (see LUCENE-2937)
|
||||
assertEquals(1, orig_floatToByte(5.8123817E-10f)); // verify it's fixed in this test code
|
||||
assertEquals(1, SmallFloat.floatToByte315(5.8123817E-10f)); // verify it's fixed
|
||||
|
||||
// test some constants
|
||||
assertEquals(0, SmallFloat.floatToByte315(0));
|
||||
assertEquals(1, SmallFloat.floatToByte315(Float.MIN_VALUE)); // underflow rounds up to smallest positive
|
||||
assertEquals(255, SmallFloat.floatToByte315(Float.MAX_VALUE) & 0xff); // overflow rounds down to largest positive
|
||||
assertEquals(255, SmallFloat.floatToByte315(Float.POSITIVE_INFINITY) & 0xff);
|
||||
|
||||
// all negatives map to 0
|
||||
assertEquals(0, SmallFloat.floatToByte315(-Float.MIN_VALUE));
|
||||
assertEquals(0, SmallFloat.floatToByte315(-Float.MAX_VALUE));
|
||||
assertEquals(0, SmallFloat.floatToByte315(Float.NEGATIVE_INFINITY));
|
||||
|
||||
|
||||
// up iterations for more exhaustive test after changing something
|
||||
int num = 100000 * RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < num; i++) {
|
||||
|
@ -95,8 +138,8 @@ public class TestSmallFloat extends LuceneTestCase {
|
|||
if (f==f) { // skip non-numbers
|
||||
byte b1 = orig_floatToByte(f);
|
||||
byte b2 = SmallFloat.floatToByte315(f);
|
||||
if (b1!=b2) {
|
||||
TestCase.fail("Failed floatToByte315 for float " + f);
|
||||
if (b1!=b2 || b2==0 && f>0) {
|
||||
fail("Failed floatToByte315 for float " + f + " source bits="+Integer.toHexString(i) + " float raw bits=" + Integer.toHexString(Float.floatToRawIntBits(i)));
|
||||
}
|
||||
}
|
||||
if (i==Integer.MAX_VALUE) break;
|
||||
|
|
|
@ -56,6 +56,7 @@ import org.apache.lucene.util.LineFileDocs;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.automaton.fst.FST.Arc;
|
||||
|
||||
public class TestFSTs extends LuceneTestCase {
|
||||
|
||||
|
@ -1322,4 +1323,85 @@ public class TestFSTs extends LuceneTestCase {
|
|||
assertEquals(b, seekResult.input);
|
||||
assertEquals(42, (long) seekResult.output);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test state expansion (array format) on close-to-root states. Creates
|
||||
* synthetic input that has one expanded state on each level.
|
||||
*
|
||||
* @see "https://issues.apache.org/jira/browse/LUCENE-2933"
|
||||
*/
|
||||
public void testExpandedCloseToRoot() throws Exception {
|
||||
class SyntheticData {
|
||||
FST<Object> compile(String[] lines) throws IOException {
|
||||
final NoOutputs outputs = NoOutputs.getSingleton();
|
||||
final Object nothing = outputs.getNoOutput();
|
||||
final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
|
||||
|
||||
int line = 0;
|
||||
final BytesRef term = new BytesRef();
|
||||
while (line < lines.length) {
|
||||
String w = lines[line++];
|
||||
if (w == null) {
|
||||
break;
|
||||
}
|
||||
term.copy(w);
|
||||
b.add(term, nothing);
|
||||
}
|
||||
|
||||
return b.finish();
|
||||
}
|
||||
|
||||
void generate(ArrayList<String> out, StringBuilder b, char from, char to,
|
||||
int depth) {
|
||||
if (depth == 0 || from == to) {
|
||||
String seq = b.toString() + "_" + out.size() + "_end";
|
||||
out.add(seq);
|
||||
} else {
|
||||
for (char c = from; c <= to; c++) {
|
||||
b.append(c);
|
||||
generate(out, b, from, c == to ? to : from, depth - 1);
|
||||
b.deleteCharAt(b.length() - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int verifyStateAndBelow(FST<Object> fst, Arc<Object> arc, int depth)
|
||||
throws IOException {
|
||||
if (fst.targetHasArcs(arc)) {
|
||||
int childCount = 0;
|
||||
for (arc = fst.readFirstTargetArc(arc, arc);;
|
||||
arc = fst.readNextArc(arc), childCount++)
|
||||
{
|
||||
boolean expanded = fst.isExpandedTarget(arc);
|
||||
int children = verifyStateAndBelow(fst, new FST.Arc<Object>().copyFrom(arc), depth + 1);
|
||||
|
||||
assertEquals(
|
||||
expanded,
|
||||
(depth <= FST.FIXED_ARRAY_SHALLOW_DISTANCE &&
|
||||
children >= FST.FIXED_ARRAY_NUM_ARCS_SHALLOW) ||
|
||||
children >= FST.FIXED_ARRAY_NUM_ARCS_DEEP);
|
||||
if (arc.isLast()) break;
|
||||
}
|
||||
|
||||
return childCount;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Sanity check.
|
||||
assertTrue(FST.FIXED_ARRAY_NUM_ARCS_SHALLOW < FST.FIXED_ARRAY_NUM_ARCS_DEEP);
|
||||
assertTrue(FST.FIXED_ARRAY_SHALLOW_DISTANCE >= 0);
|
||||
|
||||
SyntheticData s = new SyntheticData();
|
||||
|
||||
ArrayList<String> out = new ArrayList<String>();
|
||||
StringBuilder b = new StringBuilder();
|
||||
s.generate(out, b, 'a', 'i', 10);
|
||||
String[] input = out.toArray(new String[out.size()]);
|
||||
Arrays.sort(input);
|
||||
FST<Object> fst = s.compile(input);
|
||||
FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<Object>());
|
||||
s.verifyStateAndBelow(fst, arc, 1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** Factory for {@link ASCIIFoldingFilter} */
|
||||
/**
|
||||
* Factory for {@link ASCIIFoldingFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory {
|
||||
public ASCIIFoldingFilter create(TokenStream input) {
|
||||
return new ASCIIFoldingFilter(input);
|
||||
|
|
|
@ -21,8 +21,16 @@ import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
|||
|
||||
|
||||
/**
|
||||
* Factory for {@link ArabicNormalizationFilter}
|
||||
**/
|
||||
* Factory for {@link ArabicNormalizationFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ArabicNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{
|
||||
|
||||
public ArabicNormalizationFilter create(TokenStream input) {
|
||||
|
|
|
@ -21,8 +21,16 @@ import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
|||
|
||||
|
||||
/**
|
||||
* Factory for {@link ArabicStemFilter}
|
||||
**/
|
||||
* Factory for {@link ArabicStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ArabicStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ArabicStemFilterFactory extends BaseTokenFilterFactory{
|
||||
|
||||
|
||||
|
|
|
@ -21,7 +21,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||
|
||||
/** Factory for {@link BrazilianStemFilter} */
|
||||
/**
|
||||
* Factory for {@link BrazilianStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.BrazilianStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class BrazilianStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public BrazilianStemFilter create(TokenStream in) {
|
||||
return new BrazilianStemFilter(in);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
|
||||
|
||||
/** Factory for {@link BulgarianStemFilter} */
|
||||
/**
|
||||
* Factory for {@link BulgarianStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.BulgarianStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class BulgarianStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new BulgarianStemFilter(input);
|
||||
|
|
|
@ -22,7 +22,16 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.cjk.CJKTokenizer;
|
||||
import java.io.Reader;
|
||||
|
||||
/** Factory for {@link CJKTokenizer} */
|
||||
/**
|
||||
* Factory for {@link CJKTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.CJKTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class CJKTokenizerFactory extends BaseTokenizerFactory {
|
||||
public CJKTokenizer create(Reader in) {
|
||||
return new CJKTokenizer(in);
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.util.StringTokenizer;
|
|||
* The factory takes parameters:<br/>
|
||||
* "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
|
||||
* "keep" - a keep word list. Each word that should be kept separated by whitespace.<br/>
|
||||
* "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.
|
||||
* "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.<br/>
|
||||
* "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/>
|
||||
* "okPrefix" - do not change word capitalization if a word begins with something in this list.
|
||||
* for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
|
||||
|
@ -43,6 +43,16 @@ import java.util.StringTokenizer;
|
|||
* "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
|
||||
* assumed to be correct.<br/>
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.CapitalizationFilterFactory" onlyFirstWord="true"
|
||||
* keep="java solr lucene" keepIgnoreCase="false"
|
||||
* okPrefix="McK McD McA"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
|
|
|
@ -22,6 +22,15 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.standard.ClassicFilter;
|
||||
|
||||
/**
|
||||
* Factory for {@link ClassicFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ClassicFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ClassicFilterFactory extends BaseTokenFilterFactory {
|
||||
|
|
|
@ -24,6 +24,14 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link ClassicTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
|
||||
|
|
|
@ -56,11 +56,19 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
|
|||
* <li>strength: 'primary','secondary','tertiary', or 'identical' (optional)
|
||||
* <li>decomposition: 'no','canonical', or 'full' (optional)
|
||||
* </ul>
|
||||
*
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @see Collator
|
||||
* @see Locale
|
||||
* @see RuleBasedCollator
|
||||
* @since solr 1.5
|
||||
* @since solr 3.1
|
||||
*/
|
||||
public class CollationKeyFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
private Collator collator;
|
||||
|
|
|
@ -27,7 +27,15 @@ import org.apache.solr.common.ResourceLoader;
|
|||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||
|
||||
/**
|
||||
* Constructs a CommonGramsFilter
|
||||
* Constructs a {@link CommonGramsFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
|
||||
/*
|
||||
|
|
|
@ -29,10 +29,18 @@ import org.apache.solr.common.ResourceLoader;
|
|||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||
|
||||
/**
|
||||
* Construct CommonGramsQueryFilter
|
||||
* Construct {@link CommonGramsQueryFilter}.
|
||||
*
|
||||
* This is pretty close to a straight copy from StopFilterFactory
|
||||
* This is pretty close to a straight copy from {@link StopFilterFactory}.
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class CommonGramsQueryFilterFactory extends BaseTokenFilterFactory
|
||||
implements ResourceLoaderAware {
|
||||
|
|
|
@ -20,7 +20,16 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.cz.CzechStemFilter;
|
||||
|
||||
/** Factory for {@link CzechStemFilter} */
|
||||
/**
|
||||
* Factory for {@link CzechStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.CzechStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class CzechStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new CzechStemFilter(input);
|
||||
|
|
|
@ -31,8 +31,17 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
*
|
||||
* Factory for {@link DelimitedPayloadTokenFilter}
|
||||
**/
|
||||
* Factory for {@link DelimitedPayloadTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*
|
||||
*/
|
||||
public class DelimitedPayloadTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
public static final String ENCODER_ATTR = "encoder";
|
||||
public static final String DELIMITER_ATTR = "delimiter";
|
||||
|
|
|
@ -28,7 +28,18 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
|
||||
/** Factory for {@link DictionaryCompoundWordTokenFilter} */
|
||||
/**
|
||||
* Factory for {@link DictionaryCompoundWordTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="dictionary.txt"
|
||||
* minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class DictionaryCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
private CharArraySet dictionary;
|
||||
private String dictFile;
|
||||
|
|
|
@ -21,6 +21,17 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
|
||||
|
||||
/**
|
||||
* Factory for {@link DoubleMetaphoneFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_dblmtphn" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.DoubleMetaphoneFilterFactory" inject="true" maxCodeLength="4"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class DoubleMetaphoneFilterFactory extends BaseTokenFilterFactory
|
||||
{
|
||||
public static final String INJECT = "inject";
|
||||
|
|
|
@ -23,6 +23,14 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
|||
|
||||
/**
|
||||
* Creates new instances of {@link EdgeNGramTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.EdgeNGramFilterFactory" side="front" minGramSize="1" maxGramSize="1"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class EdgeNGramFilterFactory extends BaseTokenFilterFactory {
|
||||
private int maxGramSize = 0;
|
||||
|
|
|
@ -24,6 +24,13 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Creates new instances of {@link EdgeNGramTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.EdgeNGramTokenizerFactory" side="front" minGramSize="1" maxGramSize="1"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class EdgeNGramTokenizerFactory extends BaseTokenizerFactory {
|
||||
private int maxGramSize = 0;
|
||||
|
|
|
@ -27,7 +27,17 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
|||
import java.io.IOException;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** Factory for {@link ElisionFilter} */
|
||||
/**
|
||||
* Factory for {@link ElisionFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ElisionFilterFactory" articles="stopwordarticles.txt"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ElisionFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
|
||||
private CharArraySet articles;
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
|
||||
|
||||
/** Factory for {@link EnglishMinimalStemFilter} */
|
||||
/**
|
||||
* Factory for {@link EnglishMinimalStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class EnglishMinimalStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new EnglishMinimalStemFilter(input);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
|
||||
|
||||
/** Factory for {@link EnglishPossessiveFilter} */
|
||||
/**
|
||||
* Factory for {@link EnglishPossessiveFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.EnglishPossessiveFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class EnglishPossessiveFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new EnglishPossessiveFilter(input);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
|
||||
|
||||
/** Factory for {@link FinnishLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link FinnishLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.FinnishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class FinnishLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new FinnishLightStemFilter(input);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
|
||||
|
||||
/** Factory for {@link FrenchLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link FrenchLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.FrenchLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class FrenchLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new FrenchLightStemFilter(input);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
|
||||
|
||||
/** Factory for {@link FrenchMinimalStemFilter} */
|
||||
/**
|
||||
* Factory for {@link FrenchMinimalStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.FrenchMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class FrenchMinimalStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new FrenchMinimalStemFilter(input);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.gl.GalicianStemFilter;
|
||||
|
||||
/** Factory for {@link GalicianStemFilter} */
|
||||
/**
|
||||
* Factory for {@link GalicianStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.GalicianStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class GalicianStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new GalicianStemFilter(input);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.de.GermanLightStemFilter;
|
||||
|
||||
/** Factory for {@link GermanLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link GermanLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.GermanLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class GermanLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new GermanLightStemFilter(input);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
|
||||
|
||||
/** Factory for {@link GermanMinimalStemFilter} */
|
||||
/**
|
||||
* Factory for {@link GermanMinimalStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.GermanMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class GermanMinimalStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new GermanMinimalStemFilter(input);
|
||||
|
|
|
@ -22,7 +22,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.de.GermanStemFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** Factory for {@link GermanStemFilter} */
|
||||
/**
|
||||
* Factory for {@link GermanStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.GermanStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class GermanStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public GermanStemFilter create(TokenStream in) {
|
||||
return new GermanStemFilter(in);
|
||||
|
|
|
@ -26,7 +26,17 @@ import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
|
|||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
|
||||
/** Factory for {@link GreekLowerCaseFilter} */
|
||||
/**
|
||||
* Factory for {@link GreekLowerCaseFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.GreekLowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory
|
||||
{
|
||||
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.el.GreekStemFilter;
|
||||
|
||||
/** Factory for {@link GreekStemFilter} */
|
||||
/**
|
||||
* Factory for {@link GreekStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.GreekStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class GreekStemFilterFactory extends BaseTokenFilterFactory {
|
||||
|
||||
public TokenStream create(TokenStream input) {
|
||||
|
|
|
@ -21,7 +21,18 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
|
||||
public class HTMLStripCharFilterFactory extends BaseCharFilterFactory {
|
||||
/**
|
||||
* Factory for {@link HTMLStripCharFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_html" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre
|
||||
* @version $Id$
|
||||
*/
|
||||
public class HTMLStripCharFilterFactory extends BaseCharFilterFactory {
|
||||
|
||||
public HTMLStripCharFilter create(CharStream input) {
|
||||
return new HTMLStripCharFilter(input);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
|
||||
|
||||
/** Factory for {@link HindiNormalizationFilter} */
|
||||
/**
|
||||
* Factory for {@link HindiNormalizationFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.HindiNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new HindiNormalizationFilter(input);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hi.HindiStemFilter;
|
||||
|
||||
/** Factory for {@link HindiStemFilter} */
|
||||
/**
|
||||
* Factory for {@link HindiStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.HindiStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class HindiStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new HindiStemFilter(input);
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
|
||||
|
||||
/** Factory for {@link HungarianLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link HungarianLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.HungarianLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class HungarianLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new HungarianLightStemFilter(input);
|
||||
|
|
|
@ -22,7 +22,15 @@ import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
|
|||
import org.apache.solr.analysis.BaseTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link HyphenatedWordsFilter}
|
||||
* Factory for {@link HyphenatedWordsFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.HyphenatedWordsFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class HyphenatedWordsFilterFactory extends BaseTokenFilterFactory {
|
||||
public HyphenatedWordsFilter create(TokenStream input) {
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.io.InputStream;
|
|||
import org.xml.sax.InputSource;
|
||||
|
||||
/**
|
||||
* Factory for {@link HyphenationCompoundWordTokenFilter}
|
||||
* Factory for {@link HyphenationCompoundWordTokenFilter}.
|
||||
* <p>
|
||||
* This factory accepts the following parameters:
|
||||
* <ul>
|
||||
|
@ -48,6 +48,15 @@ import org.xml.sax.InputSource;
|
|||
* to the stream. defaults to false.
|
||||
* </ul>
|
||||
* <p>
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
|
||||
* dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
* @see HyphenationCompoundWordTokenFilter
|
||||
*/
|
||||
public class HyphenationCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
|
||||
|
||||
/** Factory for {@link IndicNormalizationFilter} */
|
||||
/**
|
||||
* Factory for {@link IndicNormalizationFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.IndicNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new IndicNormalizationFilter(input);
|
||||
|
|
|
@ -22,7 +22,17 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.id.IndonesianStemFilter;
|
||||
|
||||
/** Factory for {@link IndonesianStemFilter} */
|
||||
/**
|
||||
* Factory for {@link IndonesianStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class IndonesianStemFilterFactory extends BaseTokenFilterFactory {
|
||||
private boolean stemDerivational = true;
|
||||
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
|
||||
|
||||
/** Factory for {@link ItalianLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link ItalianLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ItalianLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ItalianLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new ItalianLightStemFilter(input);
|
||||
|
|
|
@ -28,6 +28,14 @@ import java.util.Set;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Factory for {@link KeepWordFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_keepword" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false" enablePositionIncrements="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class KeepWordFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
|
|
|
@ -26,7 +26,15 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
|
|||
*/
|
||||
|
||||
/**
|
||||
* Factory for {@link KeywordMarkerFilter}
|
||||
* Factory for {@link KeywordMarkerFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class KeywordMarkerFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
public static final String PROTECTED_TOKENS = "protected";
|
||||
|
|
|
@ -22,6 +22,13 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Factory for {@link KeywordTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class KeywordTokenizerFactory extends BaseTokenizerFactory {
|
||||
|
|
|
@ -23,6 +23,14 @@ import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link LengthFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.LengthFilterFactory" min="0" max="1" enablePositionIncrements="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class LengthFilterFactory extends BaseTokenFilterFactory {
|
||||
|
|
|
@ -23,6 +23,13 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link LetterTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.LetterTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class LetterTokenizerFactory extends BaseTokenizerFactory {
|
||||
|
|
|
@ -22,6 +22,17 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
|
||||
|
||||
/**
|
||||
* Factory for {@link LimitTokenCountFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_lngthcnt" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class LimitTokenCountFilterFactory extends BaseTokenFilterFactory {
|
||||
|
||||
int maxTokenCount;
|
||||
|
|
|
@ -23,6 +23,14 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
|
||||
/**
|
||||
* Factory for {@link LowerCaseFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class LowerCaseFilterFactory extends BaseTokenFilterFactory {
|
||||
|
|
|
@ -23,6 +23,13 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link LowerCaseTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class LowerCaseTokenizerFactory extends BaseTokenizerFactory {
|
||||
|
|
|
@ -32,6 +32,14 @@ import org.apache.solr.common.util.StrUtils;
|
|||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||
|
||||
/**
|
||||
* Factory for {@link MappingCharFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_map" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.4
|
||||
|
|
|
@ -22,7 +22,15 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||
|
||||
/**
|
||||
* Creates new instances of {@link NGramTokenFilter}.
|
||||
* Factory for {@link NGramTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class NGramFilterFactory extends BaseTokenFilterFactory {
|
||||
private int maxGramSize = 0;
|
||||
|
|
|
@ -24,7 +24,14 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Creates new instances of {@link NGramTokenizer}.
|
||||
* Factory for {@link NGramTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class NGramTokenizerFactory extends BaseTokenizerFactory {
|
||||
private int maxGramSize = 0;
|
||||
|
|
|
@ -23,7 +23,17 @@ import org.apache.lucene.analysis.payloads.NumericPayloadTokenFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import java.util.Map;
|
||||
|
||||
/** Factory for {@link NumericPayloadTokenFilter} */
|
||||
/**
|
||||
* Factory for {@link NumericPayloadTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_numpayload" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.NumericPayloadTokenFilterFactory" payload="24" typeMatch="word"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class NumericPayloadTokenFilterFactory extends BaseTokenFilterFactory {
|
||||
private float payload;
|
||||
private String typeMatch;
|
||||
|
|
|
@ -26,7 +26,18 @@ import org.apache.solr.common.SolrException.ErrorCode;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
/** Factory for {@link ShingleFilter} */
|
||||
/**
|
||||
* Factory for {@link ShingleFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ShingleFilterFactory" minShingleSize="2" maxShingleSize="2"
|
||||
* outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ShingleFilterFactory extends BaseTokenFilterFactory {
|
||||
private int minShingleSize;
|
||||
private int maxShingleSize;
|
||||
|
|
|
@ -32,6 +32,13 @@ import org.tartarus.snowball.SnowballProgram;
|
|||
* Factory for {@link SnowballFilter}, with configurable language
|
||||
* <p>
|
||||
* Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
|
||||
|
||||
/** Factory for {@link SpanishLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link SpanishLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.SpanishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SpanishLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new SpanishLightStemFilter(input);
|
||||
|
|
|
@ -23,6 +23,14 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
|
||||
/**
|
||||
* Factory for {@link StandardFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.StandardFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class StandardFilterFactory extends BaseTokenFilterFactory {
|
||||
|
|
|
@ -24,6 +24,13 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link StandardTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
|
||||
|
|
|
@ -28,7 +28,15 @@ import org.apache.solr.common.util.StrUtils;
|
|||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||
|
||||
/**
|
||||
* Factory for {@link StemmerOverrideFilter}
|
||||
* Factory for {@link StemmerOverrideFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id
|
||||
*/
|
||||
public class StemmerOverrideFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
private CharArrayMap<String> dictionary = null;
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
|
||||
|
||||
/** Factory for {@link SwedishLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link SwedishLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.SwedishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SwedishLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new SwedishLightStemFilter(input);
|
||||
|
|
|
@ -35,6 +35,15 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link SynonymFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="false"
|
||||
* expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SynonymFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
|
|
|
@ -22,7 +22,17 @@ import org.apache.lucene.analysis.th.ThaiWordFilter;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** Factory for {@link ThaiWordFilter} */
|
||||
/**
|
||||
* Factory for {@link ThaiWordFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.NGramTokenizerFactory"/>
|
||||
* <filter class="solr.ThaiWordFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ThaiWordFilterFactory extends BaseTokenFilterFactory {
|
||||
public ThaiWordFilter create(TokenStream input) {
|
||||
assureMatchVersion();
|
||||
|
|
|
@ -22,7 +22,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** Factory for {@link TokenOffsetPayloadTokenFilter} */
|
||||
/**
|
||||
* Factory for {@link TokenOffsetPayloadTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_tokenoffset" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.TokenOffsetPayloadTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class TokenOffsetPayloadTokenFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenOffsetPayloadTokenFilter create(TokenStream input) {
|
||||
return new TokenOffsetPayloadTokenFilter(input);
|
||||
|
|
|
@ -24,6 +24,14 @@ import org.apache.lucene.analysis.miscellaneous.TrimFilter;
|
|||
import org.apache.solr.common.SolrException;
|
||||
|
||||
/**
|
||||
* Factory for {@link TrimFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_trm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.NGramTokenizerFactory"/>
|
||||
* <filter class="solr.TrimFilterFactory" updateOffsets="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
* @see TrimFilter
|
||||
*/
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
|
||||
|
||||
/** Factory for {@link TurkishLowerCaseFilter} */
|
||||
/**
|
||||
* Factory for {@link TurkishLowerCaseFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.TurkishLowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new TurkishLowerCaseFilter(input);
|
||||
|
|
|
@ -22,7 +22,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** Factory for {@link TypeAsPayloadTokenFilter} */
|
||||
/**
|
||||
* Factory for {@link TypeAsPayloadTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_typeaspayload" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.TypeAsPayloadTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class TypeAsPayloadTokenFilterFactory extends BaseTokenFilterFactory {
|
||||
public TypeAsPayloadTokenFilter create(TokenStream input) {
|
||||
return new TypeAsPayloadTokenFilter(input);
|
||||
|
|
|
@ -27,6 +27,13 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link UAX29URLEmailTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*
|
||||
*/
|
||||
|
|
|
@ -23,6 +23,13 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link WhitespaceTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class WhitespaceTokenizerFactory extends BaseTokenizerFactory {
|
||||
|
|
|
@ -22,7 +22,16 @@ import java.io.Reader;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
|
||||
|
||||
/** Factory for {@link WikipediaTokenizer}*/
|
||||
/**
|
||||
* Factory for {@link WikipediaTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_wiki" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WikipediaTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class WikipediaTokenizerFactory extends BaseTokenizerFactory {
|
||||
// TODO: add support for WikipediaTokenizer's advanced options.
|
||||
public Tokenizer create(Reader input) {
|
||||
|
|
|
@ -37,6 +37,17 @@ import java.io.IOException;
|
|||
|
||||
|
||||
/**
|
||||
* Factory for {@link WordDelimiterFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_wd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.WordDelimiterFilterFactory" protected="protectedword.txt"
|
||||
* preserveOriginal="0" splitOnNumerics="1" splitOnCaseChange="1"
|
||||
* catenateWords="0" catenateNumbers="0" catenateAll="0"
|
||||
* generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class WordDelimiterFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
|
|
Loading…
Reference in New Issue