mirror of https://github.com/apache/lucene.git
LUCENE-3410: move changes to 3.5 and nuke deprecated code in trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1166770 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b265d499f2
commit
128aaf8387
|
@ -34,10 +34,6 @@ API Changes
|
||||||
|
|
||||||
* LUCENE-3400: Removed DutchAnalyzer.setStemDictionary (Chris Male)
|
* LUCENE-3400: Removed DutchAnalyzer.setStemDictionary (Chris Male)
|
||||||
|
|
||||||
* LUCENE-3410: Deprecated the WordDelimterFilter constructors accepting multiple
|
|
||||||
ints masquerading as booleans. Preferred constructor now accepts a single int
|
|
||||||
bitfield (Chris Male)
|
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-2341: A new analyzer/ filter: Morfologik - a dictionary-driven lemmatizer
|
* LUCENE-2341: A new analyzer/ filter: Morfologik - a dictionary-driven lemmatizer
|
||||||
|
|
|
@ -202,98 +202,6 @@ public final class WordDelimiterFilter extends TokenFilter {
|
||||||
this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords);
|
this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @param in Token stream to be filtered.
|
|
||||||
* @param charTypeTable table containing character types
|
|
||||||
* @param generateWordParts If 1, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
|
|
||||||
* @param generateNumberParts If 1, causes number subwords to be generated: "500-42" => "500" "42"
|
|
||||||
* @param catenateWords 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
|
|
||||||
* @param catenateNumbers If 1, causes maximum runs of number parts to be catenated: "500-42" => "50042"
|
|
||||||
* @param catenateAll If 1, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
|
|
||||||
* @param splitOnCaseChange 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
|
|
||||||
* @param preserveOriginal If 1, includes original words in subwords: "500-42" => "500" "42" "500-42"
|
|
||||||
* @param splitOnNumerics 1, causes "j2se" to be three tokens; "j" "2" "se"
|
|
||||||
* @param stemEnglishPossessive If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
|
|
||||||
* @param protWords If not null is the set of tokens to protect from being delimited
|
|
||||||
* @deprecated Use {@link #WordDelimiterFilter(TokenStream, byte[], int, CharArraySet)}
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public WordDelimiterFilter(TokenStream in,
|
|
||||||
byte[] charTypeTable,
|
|
||||||
int generateWordParts,
|
|
||||||
int generateNumberParts,
|
|
||||||
int catenateWords,
|
|
||||||
int catenateNumbers,
|
|
||||||
int catenateAll,
|
|
||||||
int splitOnCaseChange,
|
|
||||||
int preserveOriginal,
|
|
||||||
int splitOnNumerics,
|
|
||||||
int stemEnglishPossessive,
|
|
||||||
CharArraySet protWords) {
|
|
||||||
super(in);
|
|
||||||
|
|
||||||
int flags = 0;
|
|
||||||
if (generateWordParts != 0) {
|
|
||||||
flags |= GENERATE_WORD_PARTS;
|
|
||||||
}
|
|
||||||
if (generateNumberParts != 0) {
|
|
||||||
flags |= GENERATE_NUMBER_PARTS;
|
|
||||||
}
|
|
||||||
if (catenateWords != 0) {
|
|
||||||
flags |= CATENATE_WORDS;
|
|
||||||
}
|
|
||||||
if (catenateNumbers != 0) {
|
|
||||||
flags |= CATENATE_NUMBERS;
|
|
||||||
}
|
|
||||||
if (catenateAll != 0) {
|
|
||||||
flags |= CATENATE_ALL;
|
|
||||||
}
|
|
||||||
if (preserveOriginal != 0) {
|
|
||||||
flags |= PRESERVE_ORIGINAL;
|
|
||||||
}
|
|
||||||
if (splitOnCaseChange != 0) {
|
|
||||||
flags |= SPLIT_ON_CASE_CHANGE;
|
|
||||||
}
|
|
||||||
if (splitOnNumerics != 0) {
|
|
||||||
flags |= SPLIT_ON_NUMERICS;
|
|
||||||
}
|
|
||||||
if (stemEnglishPossessive != 0) {
|
|
||||||
flags |= STEM_ENGLISH_POSSESSIVE;
|
|
||||||
}
|
|
||||||
this.protWords = protWords;
|
|
||||||
this.iterator = new WordDelimiterIterator(charTypeTable, splitOnCaseChange != 0, splitOnNumerics != 0, stemEnglishPossessive != 0);
|
|
||||||
this.flags = flags;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param in Token stream to be filtered.
|
|
||||||
* @param generateWordParts If 1, causes parts of words to be generated: "PowerShot", "Power-Shot" => "Power" "Shot"
|
|
||||||
* @param generateNumberParts If 1, causes number subwords to be generated: "500-42" => "500" "42"
|
|
||||||
* @param catenateWords 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
|
|
||||||
* @param catenateNumbers If 1, causes maximum runs of number parts to be catenated: "500-42" => "50042"
|
|
||||||
* @param catenateAll If 1, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
|
|
||||||
* @param splitOnCaseChange 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
|
|
||||||
* @param preserveOriginal If 1, includes original words in subwords: "500-42" => "500" "42" "500-42"
|
|
||||||
* @param splitOnNumerics 1, causes "j2se" to be three tokens; "j" "2" "se"
|
|
||||||
* @param stemEnglishPossessive If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
|
|
||||||
* @param protWords If not null is the set of tokens to protect from being delimited
|
|
||||||
* @deprecated Use {@link #WordDelimiterFilter(TokenStream, int, CharArraySet)}
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public WordDelimiterFilter(TokenStream in,
|
|
||||||
int generateWordParts,
|
|
||||||
int generateNumberParts,
|
|
||||||
int catenateWords,
|
|
||||||
int catenateNumbers,
|
|
||||||
int catenateAll,
|
|
||||||
int splitOnCaseChange,
|
|
||||||
int preserveOriginal,
|
|
||||||
int splitOnNumerics,
|
|
||||||
int stemEnglishPossessive,
|
|
||||||
CharArraySet protWords) {
|
|
||||||
this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange, preserveOriginal, splitOnNumerics, stemEnglishPossessive, protWords);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
while (true) {
|
while (true) {
|
||||||
if (!hasSavedState) {
|
if (!hasSavedState) {
|
||||||
|
|
|
@ -321,7 +321,12 @@ Documentation
|
||||||
|
|
||||||
================== 3.5.0 ==================
|
================== 3.5.0 ==================
|
||||||
|
|
||||||
(No Changes)
|
Other Changes
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
* LUCENE-3410: Deprecated the WordDelimiterFilter constructors accepting multiple
|
||||||
|
ints masquerading as booleans. Preferred constructor now accepts a single int
|
||||||
|
bitfield (Chris Male)
|
||||||
|
|
||||||
================== 3.4.0 ==================
|
================== 3.4.0 ==================
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue