mirror of https://github.com/apache/lucene.git
LUCENE-8498: Remove LowerCaseTokenizer
This commit is contained in:
parent
52bdcf6bb0
commit
c0d2975970
|
@ -81,6 +81,9 @@ API Changes
|
|||
* LUCENE-8352: TokenStreamComponents is now final, and can take a Consumer<Reader>
|
||||
in its constructor (Mark Harwood, Alan Woodward, Adrien Grand)
|
||||
|
||||
* LUCENE-8498: LowerCaseTokenizer has been removed, and CharTokenizer no longer
|
||||
takes a normalizer function. (Alan Woodward)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of
|
||||
|
|
|
@ -129,3 +129,14 @@ Most code should just require recompilation, though possibly requiring some adde
|
|||
Instead of overriding TokenStreamComponents#setReader() to customise analyzer
|
||||
initialisation, you should now pass a Consumer<Reader> instance to the
|
||||
TokenStreamComponents constructor.
|
||||
|
||||
## LowerCaseTokenizer and LowerCaseTokenizerFactory have been removed ##
|
||||
|
||||
LowerCaseTokenizer combined tokenization and filtering in a way that broke token
|
||||
normalization, so they have been removed. Instead, use a LetterTokenizer followed by
|
||||
a LowerCaseFilter
|
||||
|
||||
## CharTokenizer no longer takes a normalizer function ##
|
||||
|
||||
CharTokenizer now only performs tokenization. To perform any type of filtering
|
||||
use a TokenFilter chain as you would with any other Tokenizer.
|
||||
|
|
|
@ -1,72 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
/**
|
||||
* LowerCaseTokenizer performs the function of LetterTokenizer
|
||||
* and LowerCaseFilter together. It divides text at non-letters and converts
|
||||
* them to lower case. While it is functionally equivalent to the combination
|
||||
* of LetterTokenizer and LowerCaseFilter, there is a performance advantage
|
||||
* to doing the two tasks at once, hence this (redundant) implementation.
|
||||
* <P>
|
||||
* Note: this does a decent job for most European languages, but does a terrible
|
||||
* job for some Asian languages, where words are not separated by spaces.
|
||||
* </p>
|
||||
*/
|
||||
public final class LowerCaseTokenizer extends LetterTokenizer {
|
||||
|
||||
/**
|
||||
* Construct a new LowerCaseTokenizer.
|
||||
*/
|
||||
public LowerCaseTokenizer() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new LowerCaseTokenizer using a given
|
||||
* {@link org.apache.lucene.util.AttributeFactory}.
|
||||
*
|
||||
* @param factory
|
||||
* the attribute factory to use for this {@link Tokenizer}
|
||||
*/
|
||||
public LowerCaseTokenizer(AttributeFactory factory) {
|
||||
super(factory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new LowerCaseTokenizer using a given
|
||||
* {@link org.apache.lucene.util.AttributeFactory}.
|
||||
*
|
||||
* @param factory the attribute factory to use for this {@link Tokenizer}
|
||||
* @param maxTokenLen maximum token length the tokenizer will emit.
|
||||
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
|
||||
* @throws IllegalArgumentException if maxTokenLen is invalid.
|
||||
*/
|
||||
public LowerCaseTokenizer(AttributeFactory factory, int maxTokenLen) {
|
||||
super(factory, maxTokenLen);
|
||||
}
|
||||
|
||||
/** Converts char to lower case
|
||||
* {@link Character#toLowerCase(int)}.*/
|
||||
@Override
|
||||
protected int normalize(int c) {
|
||||
return Character.toLowerCase(c);
|
||||
}
|
||||
}
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
|
||||
|
||||
/**
|
||||
* Factory for {@link LowerCaseTokenizer}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="256"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* <p>
|
||||
* Options:
|
||||
* <ul>
|
||||
* <li>maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
|
||||
* It is rare to need to change this
|
||||
* else {@link CharTokenizer}::DEFAULT_MAX_WORD_LEN</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
|
||||
private final int maxTokenLen;
|
||||
|
||||
/**
|
||||
* Creates a new LowerCaseTokenizerFactory
|
||||
*/
|
||||
public LowerCaseTokenizerFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
|
||||
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
|
||||
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public LowerCaseTokenizer create(AttributeFactory factory) {
|
||||
return new LowerCaseTokenizer(factory, maxTokenLen);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractAnalysisFactory getMultiTermComponent() {
|
||||
Map<String,String> map = new HashMap<>(getOriginalArgs());
|
||||
map.remove("maxTokenLen"); //removing "maxTokenLen" argument for LowerCaseFilterFactory init
|
||||
return new LowerCaseFilterFactory(map);
|
||||
}
|
||||
}
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.core;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
/** An {@link Analyzer} that filters {@link LetterTokenizer}
|
||||
* with {@link LowerCaseFilter}
|
||||
|
@ -34,7 +35,8 @@ public final class SimpleAnalyzer extends Analyzer {
|
|||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(final String fieldName) {
|
||||
return new TokenStreamComponents(new LowerCaseTokenizer());
|
||||
Tokenizer tokenizer = new LetterTokenizer();
|
||||
return new TokenStreamComponents(tokenizer, new LowerCaseFilter(tokenizer));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -60,13 +60,13 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
|
|||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link LowerCaseTokenizer} filtered with
|
||||
* built from a {@link LetterTokenizer} filtered with
|
||||
* {@link StopFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new LowerCaseTokenizer();
|
||||
return new TokenStreamComponents(source, new StopFilter(source, stopwords));
|
||||
final Tokenizer source = new LetterTokenizer();
|
||||
return new TokenStreamComponents(source, new StopFilter(new LowerCaseFilter(source), stopwords));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,14 +20,11 @@ package org.apache.lucene.analysis.util;
|
|||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntPredicate;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import org.apache.lucene.analysis.CharacterUtils.CharacterBuffer;
|
||||
import org.apache.lucene.analysis.CharacterUtils;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.CharacterUtils.CharacterBuffer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -107,48 +104,12 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
* </pre>
|
||||
*/
|
||||
public static CharTokenizer fromTokenCharPredicate(AttributeFactory factory, final IntPredicate tokenCharPredicate) {
|
||||
return fromTokenCharPredicate(factory, tokenCharPredicate, IntUnaryOperator.identity());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new instance of CharTokenizer using a custom predicate, supplied as method reference or lambda expression.
|
||||
* The predicate should return {@code true} for all valid token characters.
|
||||
* This factory also takes a function to normalize chars, e.g., lowercasing them, supplied as method reference or lambda expression.
|
||||
* <p>
|
||||
* This factory is intended to be used with lambdas or method references. E.g., an elegant way
|
||||
* to create an instance which behaves exactly as {@link LowerCaseTokenizer} is:
|
||||
* <pre class="prettyprint lang-java">
|
||||
* Tokenizer tok = CharTokenizer.fromTokenCharPredicate(Character::isLetter, Character::toLowerCase);
|
||||
* </pre>
|
||||
*/
|
||||
public static CharTokenizer fromTokenCharPredicate(final IntPredicate tokenCharPredicate, final IntUnaryOperator normalizer) {
|
||||
return fromTokenCharPredicate(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, tokenCharPredicate, normalizer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new instance of CharTokenizer with the supplied attribute factory using a custom predicate, supplied as method reference or lambda expression.
|
||||
* The predicate should return {@code true} for all valid token characters.
|
||||
* This factory also takes a function to normalize chars, e.g., lowercasing them, supplied as method reference or lambda expression.
|
||||
* <p>
|
||||
* This factory is intended to be used with lambdas or method references. E.g., an elegant way
|
||||
* to create an instance which behaves exactly as {@link LowerCaseTokenizer} is:
|
||||
* <pre class="prettyprint lang-java">
|
||||
* Tokenizer tok = CharTokenizer.fromTokenCharPredicate(factory, Character::isLetter, Character::toLowerCase);
|
||||
* </pre>
|
||||
*/
|
||||
public static CharTokenizer fromTokenCharPredicate(AttributeFactory factory, final IntPredicate tokenCharPredicate, final IntUnaryOperator normalizer) {
|
||||
Objects.requireNonNull(tokenCharPredicate, "predicate must not be null.");
|
||||
Objects.requireNonNull(normalizer, "normalizer must not be null");
|
||||
return new CharTokenizer(factory) {
|
||||
@Override
|
||||
protected boolean isTokenChar(int c) {
|
||||
return tokenCharPredicate.test(c);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int normalize(int c) {
|
||||
return normalizer.applyAsInt(c);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -167,7 +128,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
public static CharTokenizer fromSeparatorCharPredicate(final IntPredicate separatorCharPredicate) {
|
||||
return fromSeparatorCharPredicate(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, separatorCharPredicate);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new instance of CharTokenizer with the supplied attribute factory using a custom predicate, supplied as method reference or lambda expression.
|
||||
* The predicate should return {@code true} for all valid token separator characters.
|
||||
|
@ -179,37 +140,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
* </pre>
|
||||
*/
|
||||
public static CharTokenizer fromSeparatorCharPredicate(AttributeFactory factory, final IntPredicate separatorCharPredicate) {
|
||||
return fromSeparatorCharPredicate(factory, separatorCharPredicate, IntUnaryOperator.identity());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new instance of CharTokenizer using a custom predicate, supplied as method reference or lambda expression.
|
||||
* The predicate should return {@code true} for all valid token separator characters.
|
||||
* This factory also takes a function to normalize chars, e.g., lowercasing them, supplied as method reference or lambda expression.
|
||||
* <p>
|
||||
* This factory is intended to be used with lambdas or method references. E.g., an elegant way
|
||||
* to create an instance which behaves exactly as the combination {@link WhitespaceTokenizer} and {@link LowerCaseFilter} is:
|
||||
* <pre class="prettyprint lang-java">
|
||||
* Tokenizer tok = CharTokenizer.fromSeparatorCharPredicate(Character::isWhitespace, Character::toLowerCase);
|
||||
* </pre>
|
||||
*/
|
||||
public static CharTokenizer fromSeparatorCharPredicate(final IntPredicate separatorCharPredicate, final IntUnaryOperator normalizer) {
|
||||
return fromSeparatorCharPredicate(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, separatorCharPredicate, normalizer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new instance of CharTokenizer with the supplied attribute factory using a custom predicate.
|
||||
* The predicate should return {@code true} for all valid token separator characters.
|
||||
* This factory also takes a function to normalize chars, e.g., lowercasing them, supplied as method reference or lambda expression.
|
||||
* <p>
|
||||
* This factory is intended to be used with lambdas or method references. E.g., an elegant way
|
||||
* to create an instance which behaves exactly as {@link WhitespaceTokenizer} and {@link LowerCaseFilter} is:
|
||||
* <pre class="prettyprint lang-java">
|
||||
* Tokenizer tok = CharTokenizer.fromSeparatorCharPredicate(factory, Character::isWhitespace, Character::toLowerCase);
|
||||
* </pre>
|
||||
*/
|
||||
public static CharTokenizer fromSeparatorCharPredicate(AttributeFactory factory, final IntPredicate separatorCharPredicate, final IntUnaryOperator normalizer) {
|
||||
return fromTokenCharPredicate(factory, separatorCharPredicate.negate(), normalizer);
|
||||
return fromTokenCharPredicate(factory, separatorCharPredicate.negate());
|
||||
}
|
||||
|
||||
private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
|
||||
|
@ -230,15 +161,6 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
*/
|
||||
protected abstract boolean isTokenChar(int c);
|
||||
|
||||
/**
|
||||
* Called on each token character to normalize it before it is added to the
|
||||
* token. The default implementation does nothing. Subclasses may use this to,
|
||||
* e.g., lowercase tokens.
|
||||
*/
|
||||
protected int normalize(int c) {
|
||||
return c;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
clearAttributes();
|
||||
|
@ -276,7 +198,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer
|
||||
}
|
||||
end += charCount;
|
||||
length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
|
||||
length += Character.toChars(c, buffer, length); // buffer it, normalized
|
||||
if (length >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could break == test
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
|
||||
org.apache.lucene.analysis.core.KeywordTokenizerFactory
|
||||
org.apache.lucene.analysis.core.LetterTokenizerFactory
|
||||
org.apache.lucene.analysis.core.LowerCaseTokenizerFactory
|
||||
org.apache.lucene.analysis.core.WhitespaceTokenizerFactory
|
||||
org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory
|
||||
org.apache.lucene.analysis.ngram.NGramTokenizerFactory
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
|
||||
/**
|
||||
|
@ -147,9 +148,9 @@ public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testWithKeywordAttribute() throws IOException {
|
||||
CharArraySet set = new CharArraySet(1, true);
|
||||
set.add("Brasília");
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer();
|
||||
Tokenizer tokenizer = new LetterTokenizer();
|
||||
tokenizer.setReader(new StringReader("Brasília Brasilia"));
|
||||
BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(tokenizer, set));
|
||||
BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(new LowerCaseFilter(tokenizer), set));
|
||||
|
||||
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
|
||||
}
|
||||
|
|
|
@ -216,14 +216,6 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
int length = highSurEndingLower.length();
|
||||
assertEquals('\ud801', termBuffer[length - 1]);
|
||||
}
|
||||
|
||||
public void testLowerCaseTokenizer() throws IOException {
|
||||
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
|
||||
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer();
|
||||
tokenizer.setReader(reader);
|
||||
assertTokenStreamContents(tokenizer, new String[] { "tokenizer",
|
||||
"\ud801\udc44test" });
|
||||
}
|
||||
|
||||
public void testWhitespaceTokenizer() throws IOException {
|
||||
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
|
||||
|
|
|
@ -31,9 +31,8 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
|
||||
import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizerFactory;
|
||||
import org.apache.lucene.analysis.core.StopFilterFactory;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory;
|
||||
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
|
||||
|
@ -419,7 +418,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
@Override
|
||||
public Tokenizer create(AttributeFactory factory) {
|
||||
return new LowerCaseTokenizer(factory);
|
||||
return new LetterTokenizer(factory);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -500,14 +499,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
.build();
|
||||
assertEquals(new BytesRef("e f c"), analyzer.normalize("dummy", "a b c"));
|
||||
}
|
||||
|
||||
/** test normalize where the TokenizerFactory returns a filter to normalize the text */
|
||||
public void testNormalizationWithLowerCaseTokenizer() throws IOException {
|
||||
CustomAnalyzer analyzer1 = CustomAnalyzer.builder()
|
||||
.withTokenizer(LowerCaseTokenizerFactory.class, Collections.emptyMap())
|
||||
.build();
|
||||
assertEquals(new BytesRef("abc"), analyzer1.normalize("dummy", "ABC"));
|
||||
}
|
||||
|
||||
public void testConditions() throws IOException {
|
||||
CustomAnalyzer analyzer = CustomAnalyzer.builder()
|
||||
|
|
|
@ -23,7 +23,9 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
|
||||
public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
@ -38,10 +40,10 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testWithKeywordAttribute() throws IOException {
|
||||
CharArraySet set = new CharArraySet( 1, true);
|
||||
set.add("fischen");
|
||||
final LowerCaseTokenizer in = new LowerCaseTokenizer();
|
||||
final Tokenizer in = new LetterTokenizer();
|
||||
in.setReader(new StringReader("Fischen Trinken"));
|
||||
GermanStemFilter filter = new GermanStemFilter(
|
||||
new SetKeywordMarkerFilter(in, set));
|
||||
new SetKeywordMarkerFilter(new LowerCaseFilter(in), set));
|
||||
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
|
||||
}
|
||||
|
||||
|
|
|
@ -125,17 +125,6 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase {
|
|||
new String[] { "What", "s", "this", "thing", "do" });
|
||||
}
|
||||
|
||||
/**
|
||||
* Test LowerCaseTokenizerFactory
|
||||
*/
|
||||
public void testLowerCaseTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
Tokenizer stream = tokenizerFactory("LowerCase").create(newAttributeFactory());
|
||||
stream.setReader(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] { "what", "s", "this", "thing", "do" });
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the ASCIIFoldingFilterFactory works
|
||||
*/
|
||||
|
@ -168,11 +157,6 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase {
|
|||
});
|
||||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
|
||||
expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||
tokenizerFactory("LowerCase", "bogusArg", "bogusValue");
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
|
||||
expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||
tokenFilterFactory("ASCIIFolding", "bogusArg", "bogusValue");
|
||||
});
|
||||
|
|
|
@ -21,16 +21,12 @@ import java.io.IOException;
|
|||
import java.io.StringReader;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -54,9 +50,9 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
}
|
||||
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
|
||||
builder.insert(1023, "\ud801\udc1c");
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory());
|
||||
Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
|
||||
tokenizer.setReader(new StringReader(builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" "));
|
||||
assertTokenStreamContents(new LowerCaseFilter(tokenizer), builder.toString().toLowerCase(Locale.ROOT).split(" "));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -72,9 +68,9 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
builder.append("a");
|
||||
}
|
||||
builder.append("\ud801\udc1cabc");
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory());
|
||||
Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
|
||||
tokenizer.setReader(new StringReader(builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)});
|
||||
assertTokenStreamContents(new LowerCaseFilter(tokenizer), new String[] {builder.toString().toLowerCase(Locale.ROOT)});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -87,9 +83,9 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
for (int i = 0; i < 255; i++) {
|
||||
builder.append("A");
|
||||
}
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory());
|
||||
Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
|
||||
tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
|
||||
assertTokenStreamContents(new LowerCaseFilter(tokenizer), new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -101,14 +97,14 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
for (int i = 0; i < 100; i++) {
|
||||
builder.append("A");
|
||||
}
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 100);
|
||||
Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory(), 100);
|
||||
// Tricky, passing two copies of the string to the reader....
|
||||
tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT),
|
||||
assertTokenStreamContents(new LowerCaseFilter(tokenizer), new String[]{builder.toString().toLowerCase(Locale.ROOT),
|
||||
builder.toString().toLowerCase(Locale.ROOT) });
|
||||
|
||||
Exception e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new LowerCaseTokenizer(newAttributeFactory(), -1));
|
||||
new LetterTokenizer(newAttributeFactory(), -1));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
|
||||
|
||||
tokenizer = new LetterTokenizer(newAttributeFactory(), 100);
|
||||
|
@ -134,16 +130,16 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new LowerCaseTokenizer(newAttributeFactory(), 0));
|
||||
new LetterTokenizer(newAttributeFactory(), 0));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new LowerCaseTokenizer(newAttributeFactory(), 10_000_000));
|
||||
new LetterTokenizer(newAttributeFactory(), 10_000_000));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage());
|
||||
|
||||
tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 4800);
|
||||
tokenizer = new LetterTokenizer(newAttributeFactory(), 4800);
|
||||
tokenizer.setReader(new StringReader(builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT)});
|
||||
assertTokenStreamContents(new LowerCaseFilter(tokenizer), new String[]{builder.toString().toLowerCase(Locale.ROOT)});
|
||||
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
|
@ -195,87 +191,9 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
builder.append("A");
|
||||
}
|
||||
builder.append("\ud801\udc1c");
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory());
|
||||
Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
|
||||
tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
|
||||
}
|
||||
|
||||
// LUCENE-3642: normalize SMP->BMP and check that offsets are correct
|
||||
public void testCrossPlaneNormalization() throws IOException {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()) {
|
||||
@Override
|
||||
protected int normalize(int c) {
|
||||
if (c > 0xffff) {
|
||||
return 'δ';
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
};
|
||||
return new TokenStreamComponents(tokenizer, tokenizer);
|
||||
}
|
||||
};
|
||||
int num = 1000 * RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < num; i++) {
|
||||
String s = TestUtil.randomUnicodeString(random());
|
||||
try (TokenStream ts = analyzer.tokenStream("foo", s)) {
|
||||
ts.reset();
|
||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||
while (ts.incrementToken()) {
|
||||
String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||
for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
|
||||
cp = highlightedText.codePointAt(j);
|
||||
assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp));
|
||||
}
|
||||
}
|
||||
ts.end();
|
||||
}
|
||||
}
|
||||
// just for fun
|
||||
checkRandomData(random(), analyzer, num);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
// LUCENE-3642: normalize BMP->SMP and check that offsets are correct
|
||||
public void testCrossPlaneNormalization2() throws IOException {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()) {
|
||||
@Override
|
||||
protected int normalize(int c) {
|
||||
if (c <= 0xffff) {
|
||||
return 0x1043C;
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
};
|
||||
return new TokenStreamComponents(tokenizer, tokenizer);
|
||||
}
|
||||
};
|
||||
int num = 1000 * RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < num; i++) {
|
||||
String s = TestUtil.randomUnicodeString(random());
|
||||
try (TokenStream ts = analyzer.tokenStream("foo", s)) {
|
||||
ts.reset();
|
||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||
while (ts.incrementToken()) {
|
||||
String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||
for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
|
||||
cp = highlightedText.codePointAt(j);
|
||||
assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp));
|
||||
}
|
||||
}
|
||||
ts.end();
|
||||
}
|
||||
}
|
||||
// just for fun
|
||||
checkRandomData(random(), analyzer, num);
|
||||
analyzer.close();
|
||||
assertTokenStreamContents(new LowerCaseFilter(tokenizer), new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
|
||||
}
|
||||
|
||||
public void testDefinitionUsingMethodReference1() throws Exception {
|
||||
|
@ -287,16 +205,16 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testDefinitionUsingMethodReference2() throws Exception {
|
||||
final StringReader reader = new StringReader("Tokenizer(Test)");
|
||||
final Tokenizer tokenizer = CharTokenizer.fromTokenCharPredicate(Character::isLetter, Character::toUpperCase);
|
||||
final Tokenizer tokenizer = CharTokenizer.fromTokenCharPredicate(Character::isLetter);
|
||||
tokenizer.setReader(reader);
|
||||
assertTokenStreamContents(tokenizer, new String[] { "TOKENIZER", "TEST" });
|
||||
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", "Test" });
|
||||
}
|
||||
|
||||
public void testDefinitionUsingLambda() throws Exception {
|
||||
final StringReader reader = new StringReader("Tokenizer\u00A0Test Foo");
|
||||
final Tokenizer tokenizer = CharTokenizer.fromSeparatorCharPredicate(c -> c == '\u00A0' || Character.isWhitespace(c), Character::toLowerCase);
|
||||
final Tokenizer tokenizer = CharTokenizer.fromSeparatorCharPredicate(c -> c == '\u00A0' || Character.isWhitespace(c));
|
||||
tokenizer.setReader(reader);
|
||||
assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "test", "foo" });
|
||||
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", "Test", "Foo" });
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -126,20 +126,7 @@
|
|||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<filter class="solr.ClassicFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldType name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -386,8 +373,6 @@
|
|||
<field name="test_hlt_off" type="highlittext" indexed="true"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -90,19 +90,7 @@
|
|||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldType name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -347,8 +335,6 @@
|
|||
<field name="test_hlt_off" type="highlittext" indexed="true"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -81,7 +81,8 @@
|
|||
|
||||
<fieldType name="text_lower_token" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<tokenizer class="solr.LetterTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -139,18 +139,7 @@
|
|||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldtype name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -484,8 +473,6 @@
|
|||
termPositions="true" termOffsets="true"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -73,19 +73,7 @@
|
|||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldType name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -331,8 +319,6 @@
|
|||
<field name="test_hlt_off" type="highlittext" indexed="true"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -199,17 +199,13 @@
|
|||
|
||||
<fieldType name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<tokenizer class="solr.LetterTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldType name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -604,7 +600,6 @@
|
|||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -149,18 +149,7 @@
|
|||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldtype name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -501,8 +490,6 @@
|
|||
termPositions="true" termOffsets="true"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -43,7 +43,8 @@ more concise example.
|
|||
|
||||
<fieldType name="lowerCasefieldType" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="3" />
|
||||
<tokenizer class="solr.LetterTokenizerFactory" maxTokenLen="3" />
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -85,12 +86,6 @@ more concise example.
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="lowerCase0fieldType" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="3" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="whiteSp0fieldType" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory" maxTokenLen="3" />
|
||||
|
@ -112,13 +107,11 @@ more concise example.
|
|||
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
|
||||
|
||||
<field name="letter" type="letterfieldType" indexed="true" stored="true"/>
|
||||
<field name="lowerCase" type="lowerCasefieldType" indexed="true" stored="true"/>
|
||||
<field name="whiteSpace" type="whiteSpfieldType" indexed="true" stored="true"/>
|
||||
<field name="unicodeWhiteSpace" type="uniWhiteSpfieldType" indexed="true" stored="true"/>
|
||||
<field name="keyword" type="keywordfieldType" indexed="true" stored="true"/>
|
||||
|
||||
<field name="letter0" type="letter0fieldType" indexed="true" stored="true"/>
|
||||
<field name="lowerCase0" type="lowerCase0fieldType" indexed="true" stored="true"/>
|
||||
<field name="whiteSpace0" type="whiteSp0fieldType" indexed="true" stored="true"/>
|
||||
<field name="unicodeWhiteSpace0" type="uniWhiteSp0fieldType" indexed="true" stored="true"/>
|
||||
<field name="keyword0" type="keyword0fieldType" indexed="true" stored="true"/>
|
||||
|
|
|
@ -142,20 +142,17 @@
|
|||
|
||||
<fieldType name="teststop" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<tokenizer class="solr.LetterTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<tokenizer class="solr.LetterTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldType name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -574,7 +571,7 @@
|
|||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowerfilt" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
@ -636,7 +633,7 @@
|
|||
|
||||
<field name="store" type="location" indexed="true" stored="true" omitNorms="false"/>
|
||||
|
||||
<field name="lower" type="lowertok" indexed="false" stored="true" multiValued="true"/>
|
||||
<field name="lower" type="lowerfilt" indexed="false" stored="true" multiValued="true"/>
|
||||
<field name="_route_" type="string" indexed="true" stored="true" multiValued="false"/>
|
||||
|
||||
<field name="payloadDelimited" type="payloadDelimited"/>
|
||||
|
|
|
@ -234,17 +234,13 @@
|
|||
|
||||
<fieldType name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<tokenizer class="solr.LetterTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldType name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -591,7 +587,6 @@
|
|||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -163,19 +163,7 @@
|
|||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldType name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -505,8 +493,6 @@
|
|||
<field name="test_hlt_off" type="highlittext" indexed="true"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -164,19 +164,7 @@
|
|||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldType name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -517,8 +505,6 @@
|
|||
<field name="test_hlt_off" type="highlittext" indexed="true"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -77,11 +77,11 @@ public class TestFieldCollectionResource extends SolrRestletTestBase {
|
|||
"/fields/[0]/name=='HTMLstandardtok'",
|
||||
"/fields/[1]/name=='HTMLwhitetok'",
|
||||
"/fields/[2]/name=='_version_'",
|
||||
"/fields/[108]/name=='*_d'",
|
||||
"/fields/[107]/name=='*_f'",
|
||||
"/fields/[106]/name=='*_b'",
|
||||
"/fields/[105]/name=='*_t'",
|
||||
"/fields/[104]/name=='*_l'"
|
||||
"/fields/[107]/name=='*_d'",
|
||||
"/fields/[106]/name=='*_f'",
|
||||
"/fields/[105]/name=='*_b'",
|
||||
"/fields/[104]/name=='*_t'",
|
||||
"/fields/[103]/name=='*_l'"
|
||||
|
||||
);
|
||||
}
|
||||
|
|
|
@ -81,7 +81,8 @@ public class TestFieldTypeResource extends SolrRestletTestBase {
|
|||
"count(/response/lst[@name='fieldType']/*) = 3",
|
||||
"/response/lst[@name='fieldType']/str[@name='name'] = 'teststop'",
|
||||
"/response/lst[@name='fieldType']/str[@name='class'] = 'solr.TextField'",
|
||||
"/response/lst[@name='fieldType']/lst[@name='analyzer']/lst[@name='tokenizer']/str[@name='class'] = 'solr.LowerCaseTokenizerFactory'",
|
||||
"/response/lst[@name='fieldType']/lst[@name='analyzer']/lst[@name='tokenizer']/str[@name='class'] = 'solr.LetterTokenizerFactory'",
|
||||
"/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='class'][.='solr.LowerCaseFilterFactory']",
|
||||
"/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='class'][.='solr.StopFilterFactory']",
|
||||
"/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='words'][.='stopwords.txt']"
|
||||
);
|
||||
|
|
|
@ -22,7 +22,6 @@ import org.junit.BeforeClass;
|
|||
|
||||
/**
|
||||
* Tests for:
|
||||
* {@link org.apache.lucene.analysis.core.LowerCaseTokenizerFactory}
|
||||
* {@link org.apache.lucene.analysis.core.LetterTokenizerFactory}
|
||||
* {@link org.apache.lucene.analysis.core.KeywordTokenizerFactory}
|
||||
* {@link org.apache.lucene.analysis.core.WhitespaceTokenizerFactory}
|
||||
|
@ -44,25 +43,18 @@ public class TestMaxTokenLenTokenizer extends SolrTestCaseJ4 {
|
|||
// using fields with definitions, different tokenizer factories respectively at index time and standard tokenizer at query time.
|
||||
|
||||
updateJ("{\"add\":{\"doc\": {\"id\":1,\"letter\":\"letter\"}},\"commit\":{}}",null);
|
||||
updateJ("{\"add\":{\"doc\": {\"id\":2,\"lowerCase\":\"lowerCase\"}},\"commit\":{}}",null);
|
||||
updateJ("{\"add\":{\"doc\": {\"id\":3,\"whiteSpace\":\"whiteSpace in\"}},\"commit\":{}}",null);
|
||||
updateJ("{\"add\":{\"doc\": {\"id\":4,\"unicodeWhiteSpace\":\"unicode in\"}},\"commit\":{}}",null);
|
||||
updateJ("{\"add\":{\"doc\": {\"id\":5,\"keyword\":\"keyword\"}},\"commit\":{}}",null);
|
||||
|
||||
assertU(commit());
|
||||
|
||||
assertQ("Check the total number of docs", req("q","*:*"), "//result[@numFound=5]");
|
||||
assertQ("Check the total number of docs", req("q","*:*"), "//result[@numFound=4]");
|
||||
|
||||
//Tokens generated for "letter": "let" "ter" "letter" , maxTokenLen=3
|
||||
assertQ("Check the total number of docs", req("q","letter:let"), "//result[@numFound=1]");
|
||||
assertQ("Check the total number of docs", req("q","letter:lett"), "//result[@numFound=0]");
|
||||
|
||||
//Tokens generated for "lowerCase": "low" "erC" "ase" "lowerCase" , maxTokenLen=3
|
||||
assertQ("Check the total number of docs", req("q","lowerCase:low"), "//result[@numFound=1]");
|
||||
assertQ("Check the total number of docs", req("q","lowerCase:l"), "//result[@numFound=0]");
|
||||
assertQ("Check the total number of docs", req("q","lowerCase:lo"), "//result[@numFound=0]");
|
||||
assertQ("Check the total number of docs", req("q","lowerCase:lower"), "//result[@numFound=0]");
|
||||
|
||||
//Tokens generated for "whiteSpace in": "whi" "teS" "pac" "e" "in" "whiteSpace" , maxTokenLen=3
|
||||
assertQ("Check the total number of docs", req("q","whiteSpace:whi"), "//result[@numFound=1]");
|
||||
assertQ("Check the total number of docs", req("q","whiteSpace:teS"), "//result[@numFound=1]");
|
||||
|
@ -88,14 +80,13 @@ public class TestMaxTokenLenTokenizer extends SolrTestCaseJ4 {
|
|||
// using fields with definitions, same tokenizers both at index and query time.
|
||||
|
||||
updateJ("{\"add\":{\"doc\": {\"id\":1,\"letter0\":\"letter\"}},\"commit\":{}}",null);
|
||||
updateJ("{\"add\":{\"doc\": {\"id\":2,\"lowerCase0\":\"lowerCase\"}},\"commit\":{}}",null);
|
||||
updateJ("{\"add\":{\"doc\": {\"id\":3,\"whiteSpace0\":\"whiteSpace in\"}},\"commit\":{}}",null);
|
||||
updateJ("{\"add\":{\"doc\": {\"id\":4,\"unicodeWhiteSpace0\":\"unicode in\"}},\"commit\":{}}",null);
|
||||
updateJ("{\"add\":{\"doc\": {\"id\":5,\"keyword0\":\"keyword\"}},\"commit\":{}}",null);
|
||||
|
||||
assertU(commit());
|
||||
|
||||
assertQ("Check the total number of docs", req("q","*:*"), "//result[@numFound=5]");
|
||||
assertQ("Check the total number of docs", req("q","*:*"), "//result[@numFound=4]");
|
||||
|
||||
//Tokens generated for "letter": "let" "ter" "letter" , maxTokenLen=3
|
||||
// Anything that matches the first three letters should be found when maxLen=3
|
||||
|
@ -104,13 +95,6 @@ public class TestMaxTokenLenTokenizer extends SolrTestCaseJ4 {
|
|||
assertQ("Check the total number of docs", req("q","letter0:lett"), "//result[@numFound=1]");
|
||||
assertQ("Check the total number of docs", req("q","letter0:letXYZ"), "//result[@numFound=1]");
|
||||
|
||||
//Tokens generated for "lowerCase": "low" "erC" "ase" "lowerCase" , maxTokenLen=3
|
||||
// Anything that matches the first three letters should be found when maxLen=3
|
||||
assertQ("Check the total number of docs", req("q","lowerCase0:low"), "//result[@numFound=1]");
|
||||
assertQ("Check the total number of docs", req("q","lowerCase0:l"), "//result[@numFound=0]");
|
||||
assertQ("Check the total number of docs", req("q","lowerCase0:lo"), "//result[@numFound=0]");
|
||||
assertQ("Check the total number of docs", req("q","lowerCase0:lowerXYZ"), "//result[@numFound=1]");
|
||||
|
||||
//Tokens generated for "whiteSpace in": "whi" "teS" "pac" "e" "in" "whiteSpace" , maxTokenLen=3
|
||||
// Anything that matches the first three letters should be found when maxLen=3
|
||||
assertQ("Check the total number of docs", req("q","whiteSpace0:h"), "//result[@numFound=0]");
|
||||
|
|
|
@ -141,18 +141,7 @@
|
|||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldtype name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -493,8 +482,6 @@
|
|||
termPositions="true" termOffsets="true"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -116,18 +116,7 @@
|
|||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldType name="lowertok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="keywordtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
|
||||
|
@ -461,8 +450,6 @@
|
|||
termPositions="true" termOffsets="true"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
|
@ -137,16 +137,7 @@
|
|||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldtype name="lowertok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
|
||||
</fieldtype>
|
||||
|
@ -479,8 +470,6 @@
|
|||
termPositions="true" termOffsets="true"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
|
|
Loading…
Reference in New Issue