mirror of https://github.com/apache/lucene.git
exception if both generate and catenate==0: SOLR-34
git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@423235 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9de4ebb4fe
commit
163f926c44
|
@ -61,6 +61,8 @@ Bug Fixes
|
||||||
(Rob Staveley, yonik)
|
(Rob Staveley, yonik)
|
||||||
6. Worked around a Jetty bug that caused invalid XML responses for fields
|
6. Worked around a Jetty bug that caused invalid XML responses for fields
|
||||||
containing non ASCII chars. (Bertrand Delacretaz via yonik, SOLR-32)
|
containing non ASCII chars. (Bertrand Delacretaz via yonik, SOLR-32)
|
||||||
|
7. WordDelimiterFilter can throw exceptions if configured with both
|
||||||
|
generate and catenate off. (Mike Klaas via yonik, SOLR-34)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
1. Upgrade to Lucene 2.0 nightly build 2006-06-22, lucene SVN revision 416224,
|
1. Upgrade to Lucene 2.0 nightly build 2006-06-22, lucene SVN revision 416224,
|
||||||
|
|
|
@ -329,16 +329,16 @@ final class WordDelimiterFilter extends TokenFilter {
|
||||||
if (numWords==0) {
|
if (numWords==0) {
|
||||||
// all numbers
|
// all numbers
|
||||||
addCombos(tlist,0,numtok,generateNumberParts!=0,catenateNumbers!=0 || catenateAll!=0, 1);
|
addCombos(tlist,0,numtok,generateNumberParts!=0,catenateNumbers!=0 || catenateAll!=0, 1);
|
||||||
break;
|
if (queue.size() > 0) break; else continue;
|
||||||
} else if (numNumbers==0) {
|
} else if (numNumbers==0) {
|
||||||
// all words
|
// all words
|
||||||
addCombos(tlist,0,numtok,generateWordParts!=0,catenateWords!=0 || catenateAll!=0, 1);
|
addCombos(tlist,0,numtok,generateWordParts!=0,catenateWords!=0 || catenateAll!=0, 1);
|
||||||
break;
|
if (queue.size() > 0) break; else continue;
|
||||||
} else if (generateNumberParts==0 && generateWordParts==0 && catenateNumbers==0 && catenateWords==0) {
|
} else if (generateNumberParts==0 && generateWordParts==0 && catenateNumbers==0 && catenateWords==0) {
|
||||||
// catenate all *only*
|
// catenate all *only*
|
||||||
// OPT:could be optimized to add to current queue...
|
// OPT:could be optimized to add to current queue...
|
||||||
addCombos(tlist,0,numtok,false,catenateAll!=0, 1);
|
addCombos(tlist,0,numtok,false,catenateAll!=0, 1);
|
||||||
break;
|
if (queue.size() > 0) break; else continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -369,7 +369,10 @@ final class WordDelimiterFilter extends TokenFilter {
|
||||||
addCombos(tlist,0,numtok,false,true,0);
|
addCombos(tlist,0,numtok,false,true,0);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
// NOTE: in certain cases, queue may be empty (for instance, if catenate
|
||||||
|
// and generate are both set to false). In this case, we should proceed
|
||||||
|
// to next token rather than throwing ArrayOutOfBounds
|
||||||
|
if (queue.size() > 0) break; else continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// System.out.println("##########AFTER COMBINATIONS:"+ str(queue));
|
// System.out.println("##########AFTER COMBINATIONS:"+ str(queue));
|
||||||
|
|
|
@ -1,284 +1,284 @@
|
||||||
/**
|
/**
|
||||||
* Copyright 2006 The Apache Software Foundation
|
* Copyright 2006 The Apache Software Foundation
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
* You may obtain a copy of the License at
|
* You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author yonik
|
* @author yonik
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
public class TestSynonymFilter extends TestCase {
|
public class TestSynonymFilter extends TestCase {
|
||||||
|
|
||||||
public List strings(String str) {
|
public List strings(String str) {
|
||||||
String[] arr = str.split(" ");
|
String[] arr = str.split(" ");
|
||||||
return Arrays.asList(arr);
|
return Arrays.asList(arr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/***
|
/***
|
||||||
* Return a list of tokens according to a test string format:
|
* Return a list of tokens according to a test string format:
|
||||||
* a b c => returns List<Token> [a,b,c]
|
* a b c => returns List<Token> [a,b,c]
|
||||||
* a/b => tokens a and b share the same spot (b.positionIncrement=0)
|
* a/b => tokens a and b share the same spot (b.positionIncrement=0)
|
||||||
* a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
|
* a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
|
||||||
*/
|
*/
|
||||||
public List tokens(String str) {
|
public List tokens(String str) {
|
||||||
String[] arr = str.split(" ");
|
String[] arr = str.split(" ");
|
||||||
List result = new ArrayList();
|
List result = new ArrayList();
|
||||||
for (int i=0; i<arr.length; i++) {
|
for (int i=0; i<arr.length; i++) {
|
||||||
String[] toks = arr[i].split("/");
|
String[] toks = arr[i].split("/");
|
||||||
String[] params = toks[0].split(",");
|
String[] params = toks[0].split(",");
|
||||||
Token t = new Token(params[0],0,0,"TEST");
|
Token t = new Token(params[0],0,0,"TEST");
|
||||||
if (params.length > 1) t.setPositionIncrement(Integer.parseInt(params[1]));
|
if (params.length > 1) t.setPositionIncrement(Integer.parseInt(params[1]));
|
||||||
result.add(t);
|
result.add(t);
|
||||||
for (int j=1; j<toks.length; j++) {
|
for (int j=1; j<toks.length; j++) {
|
||||||
t = new Token(toks[j],0,0,"TEST");
|
t = new Token(toks[j],0,0,"TEST");
|
||||||
t.setPositionIncrement(0);
|
t.setPositionIncrement(0);
|
||||||
result.add(t);
|
result.add(t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List getTokList(SynonymMap dict, String input, boolean includeOrig) throws IOException {
|
public List getTokList(SynonymMap dict, String input, boolean includeOrig) throws IOException {
|
||||||
ArrayList lst = new ArrayList();
|
ArrayList lst = new ArrayList();
|
||||||
final List toks = tokens(input);
|
final List toks = tokens(input);
|
||||||
TokenStream ts = new TokenStream() {
|
TokenStream ts = new TokenStream() {
|
||||||
Iterator iter = toks.iterator();
|
Iterator iter = toks.iterator();
|
||||||
public Token next() throws IOException {
|
public Token next() throws IOException {
|
||||||
return iter.hasNext() ? (Token)iter.next() : null;
|
return iter.hasNext() ? (Token)iter.next() : null;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
SynonymFilter sf = new SynonymFilter(ts, dict, true);
|
SynonymFilter sf = new SynonymFilter(ts, dict, true);
|
||||||
|
|
||||||
while(true) {
|
while(true) {
|
||||||
Token t = sf.next();
|
Token t = sf.next();
|
||||||
if (t==null) return lst;
|
if (t==null) return lst;
|
||||||
lst.add(t);
|
lst.add(t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List tok2str(List tokLst) {
|
public List tok2str(List tokLst) {
|
||||||
ArrayList lst = new ArrayList();
|
ArrayList lst = new ArrayList();
|
||||||
for (Iterator iter = tokLst.iterator(); iter.hasNext();) {
|
for (Iterator iter = tokLst.iterator(); iter.hasNext();) {
|
||||||
lst.add(((Token)(iter.next())).termText());
|
lst.add(((Token)(iter.next())).termText());
|
||||||
}
|
}
|
||||||
return lst;
|
return lst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void assertTokEqual(List a, List b) {
|
public void assertTokEqual(List a, List b) {
|
||||||
assertTokEq(a,b);
|
assertTokEq(a,b);
|
||||||
assertTokEq(b,a);
|
assertTokEq(b,a);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertTokEq(List a, List b) {
|
private void assertTokEq(List a, List b) {
|
||||||
int pos=0;
|
int pos=0;
|
||||||
for (Iterator iter = a.iterator(); iter.hasNext();) {
|
for (Iterator iter = a.iterator(); iter.hasNext();) {
|
||||||
Token tok = (Token)iter.next();
|
Token tok = (Token)iter.next();
|
||||||
pos += tok.getPositionIncrement();
|
pos += tok.getPositionIncrement();
|
||||||
if (!tokAt(b, tok.termText(), pos)) {
|
if (!tokAt(b, tok.termText(), pos)) {
|
||||||
fail(a + "!=" + b);
|
fail(a + "!=" + b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean tokAt(List lst, String val, int tokPos) {
|
public boolean tokAt(List lst, String val, int tokPos) {
|
||||||
int pos=0;
|
int pos=0;
|
||||||
for (Iterator iter = lst.iterator(); iter.hasNext();) {
|
for (Iterator iter = lst.iterator(); iter.hasNext();) {
|
||||||
Token tok = (Token)iter.next();
|
Token tok = (Token)iter.next();
|
||||||
pos += tok.getPositionIncrement();
|
pos += tok.getPositionIncrement();
|
||||||
if (pos==tokPos && tok.termText().equals(val)) return true;
|
if (pos==tokPos && tok.termText().equals(val)) return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testMatching() throws IOException {
|
public void testMatching() throws IOException {
|
||||||
SynonymMap map = new SynonymMap();
|
SynonymMap map = new SynonymMap();
|
||||||
|
|
||||||
boolean orig = false;
|
boolean orig = false;
|
||||||
boolean merge = true;
|
boolean merge = true;
|
||||||
map.add(strings("a b"), tokens("ab"), orig, merge);
|
map.add(strings("a b"), tokens("ab"), orig, merge);
|
||||||
map.add(strings("a c"), tokens("ac"), orig, merge);
|
map.add(strings("a c"), tokens("ac"), orig, merge);
|
||||||
map.add(strings("a"), tokens("aa"), orig, merge);
|
map.add(strings("a"), tokens("aa"), orig, merge);
|
||||||
map.add(strings("b"), tokens("bb"), orig, merge);
|
map.add(strings("b"), tokens("bb"), orig, merge);
|
||||||
map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
|
map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
|
||||||
map.add(strings("x c"), tokens("xc"), orig, merge);
|
map.add(strings("x c"), tokens("xc"), orig, merge);
|
||||||
|
|
||||||
// System.out.println(map);
|
// System.out.println(map);
|
||||||
// System.out.println(getTokList(map,"a",false));
|
// System.out.println(getTokList(map,"a",false));
|
||||||
|
|
||||||
assertTokEqual(getTokList(map,"$",false), tokens("$"));
|
assertTokEqual(getTokList(map,"$",false), tokens("$"));
|
||||||
assertTokEqual(getTokList(map,"a",false), tokens("aa"));
|
assertTokEqual(getTokList(map,"a",false), tokens("aa"));
|
||||||
assertTokEqual(getTokList(map,"a $",false), tokens("aa $"));
|
assertTokEqual(getTokList(map,"a $",false), tokens("aa $"));
|
||||||
assertTokEqual(getTokList(map,"$ a",false), tokens("$ aa"));
|
assertTokEqual(getTokList(map,"$ a",false), tokens("$ aa"));
|
||||||
assertTokEqual(getTokList(map,"a a",false), tokens("aa aa"));
|
assertTokEqual(getTokList(map,"a a",false), tokens("aa aa"));
|
||||||
assertTokEqual(getTokList(map,"b",false), tokens("bb"));
|
assertTokEqual(getTokList(map,"b",false), tokens("bb"));
|
||||||
assertTokEqual(getTokList(map,"z x c v",false), tokens("zxcv"));
|
assertTokEqual(getTokList(map,"z x c v",false), tokens("zxcv"));
|
||||||
assertTokEqual(getTokList(map,"z x c $",false), tokens("z xc $"));
|
assertTokEqual(getTokList(map,"z x c $",false), tokens("z xc $"));
|
||||||
|
|
||||||
// repeats
|
// repeats
|
||||||
map.add(strings("a b"), tokens("ab"), orig, merge);
|
map.add(strings("a b"), tokens("ab"), orig, merge);
|
||||||
map.add(strings("a b"), tokens("ab"), orig, merge);
|
map.add(strings("a b"), tokens("ab"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"a b",false), tokens("ab"));
|
assertTokEqual(getTokList(map,"a b",false), tokens("ab"));
|
||||||
|
|
||||||
// check for lack of recursion
|
// check for lack of recursion
|
||||||
map.add(strings("zoo"), tokens("zoo"), orig, merge);
|
map.add(strings("zoo"), tokens("zoo"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo $ zoo"));
|
assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo $ zoo"));
|
||||||
map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
|
map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo zoo zoo $ zoo zoo"));
|
assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo zoo zoo $ zoo zoo"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIncludeOrig() throws IOException {
|
public void testIncludeOrig() throws IOException {
|
||||||
SynonymMap map = new SynonymMap();
|
SynonymMap map = new SynonymMap();
|
||||||
|
|
||||||
boolean orig = true;
|
boolean orig = true;
|
||||||
boolean merge = true;
|
boolean merge = true;
|
||||||
map.add(strings("a b"), tokens("ab"), orig, merge);
|
map.add(strings("a b"), tokens("ab"), orig, merge);
|
||||||
map.add(strings("a c"), tokens("ac"), orig, merge);
|
map.add(strings("a c"), tokens("ac"), orig, merge);
|
||||||
map.add(strings("a"), tokens("aa"), orig, merge);
|
map.add(strings("a"), tokens("aa"), orig, merge);
|
||||||
map.add(strings("b"), tokens("bb"), orig, merge);
|
map.add(strings("b"), tokens("bb"), orig, merge);
|
||||||
map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
|
map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
|
||||||
map.add(strings("x c"), tokens("xc"), orig, merge);
|
map.add(strings("x c"), tokens("xc"), orig, merge);
|
||||||
|
|
||||||
// System.out.println(map);
|
// System.out.println(map);
|
||||||
// System.out.println(getTokList(map,"a",false));
|
// System.out.println(getTokList(map,"a",false));
|
||||||
|
|
||||||
assertTokEqual(getTokList(map,"$",false), tokens("$"));
|
assertTokEqual(getTokList(map,"$",false), tokens("$"));
|
||||||
assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
|
assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
|
||||||
assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
|
assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
|
||||||
assertTokEqual(getTokList(map,"$ a",false), tokens("$ a/aa"));
|
assertTokEqual(getTokList(map,"$ a",false), tokens("$ a/aa"));
|
||||||
assertTokEqual(getTokList(map,"a $",false), tokens("a/aa $"));
|
assertTokEqual(getTokList(map,"a $",false), tokens("a/aa $"));
|
||||||
assertTokEqual(getTokList(map,"$ a !",false), tokens("$ a/aa !"));
|
assertTokEqual(getTokList(map,"$ a !",false), tokens("$ a/aa !"));
|
||||||
assertTokEqual(getTokList(map,"a a",false), tokens("a/aa a/aa"));
|
assertTokEqual(getTokList(map,"a a",false), tokens("a/aa a/aa"));
|
||||||
assertTokEqual(getTokList(map,"b",false), tokens("b/bb"));
|
assertTokEqual(getTokList(map,"b",false), tokens("b/bb"));
|
||||||
assertTokEqual(getTokList(map,"z x c v",false), tokens("z/zxcv x c v"));
|
assertTokEqual(getTokList(map,"z x c v",false), tokens("z/zxcv x c v"));
|
||||||
assertTokEqual(getTokList(map,"z x c $",false), tokens("z x/xc c $"));
|
assertTokEqual(getTokList(map,"z x c $",false), tokens("z x/xc c $"));
|
||||||
|
|
||||||
// check for lack of recursion
|
// check for lack of recursion
|
||||||
map.add(strings("zoo zoo"), tokens("zoo"), orig, merge);
|
map.add(strings("zoo zoo"), tokens("zoo"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo/zoo $ zoo/zoo"));
|
assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo/zoo $ zoo/zoo"));
|
||||||
map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
|
map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo $ zoo/zoo zoo"));
|
assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo $ zoo/zoo zoo"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testMapMerge() throws IOException {
|
public void testMapMerge() throws IOException {
|
||||||
SynonymMap map = new SynonymMap();
|
SynonymMap map = new SynonymMap();
|
||||||
|
|
||||||
boolean orig = false;
|
boolean orig = false;
|
||||||
boolean merge = true;
|
boolean merge = true;
|
||||||
map.add(strings("a"), tokens("a5,5"), orig, merge);
|
map.add(strings("a"), tokens("a5,5"), orig, merge);
|
||||||
map.add(strings("a"), tokens("a3,3"), orig, merge);
|
map.add(strings("a"), tokens("a3,3"), orig, merge);
|
||||||
// System.out.println(map);
|
// System.out.println(map);
|
||||||
assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2"));
|
assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2"));
|
||||||
|
|
||||||
map.add(strings("b"), tokens("b3,3"), orig, merge);
|
map.add(strings("b"), tokens("b3,3"), orig, merge);
|
||||||
map.add(strings("b"), tokens("b5,5"), orig, merge);
|
map.add(strings("b"), tokens("b5,5"), orig, merge);
|
||||||
//System.out.println(map);
|
//System.out.println(map);
|
||||||
assertTokEqual(getTokList(map,"b",false), tokens("b3 b5,2"));
|
assertTokEqual(getTokList(map,"b",false), tokens("b3 b5,2"));
|
||||||
|
|
||||||
|
|
||||||
map.add(strings("a"), tokens("A3,3"), orig, merge);
|
map.add(strings("a"), tokens("A3,3"), orig, merge);
|
||||||
map.add(strings("a"), tokens("A5,5"), orig, merge);
|
map.add(strings("a"), tokens("A5,5"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"a",false), tokens("a3/A3 a5,2/A5"));
|
assertTokEqual(getTokList(map,"a",false), tokens("a3/A3 a5,2/A5"));
|
||||||
|
|
||||||
map.add(strings("a"), tokens("a1"), orig, merge);
|
map.add(strings("a"), tokens("a1"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"a",false), tokens("a1 a3,2/A3 a5,2/A5"));
|
assertTokEqual(getTokList(map,"a",false), tokens("a1 a3,2/A3 a5,2/A5"));
|
||||||
|
|
||||||
map.add(strings("a"), tokens("a2,2"), orig, merge);
|
map.add(strings("a"), tokens("a2,2"), orig, merge);
|
||||||
map.add(strings("a"), tokens("a4,4 a6,2"), orig, merge);
|
map.add(strings("a"), tokens("a4,4 a6,2"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"a",false), tokens("a1 a2 a3/A3 a4 a5/A5 a6"));
|
assertTokEqual(getTokList(map,"a",false), tokens("a1 a2 a3/A3 a4 a5/A5 a6"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testOverlap() throws IOException {
|
public void testOverlap() throws IOException {
|
||||||
SynonymMap map = new SynonymMap();
|
SynonymMap map = new SynonymMap();
|
||||||
|
|
||||||
boolean orig = false;
|
boolean orig = false;
|
||||||
boolean merge = true;
|
boolean merge = true;
|
||||||
map.add(strings("qwe"), tokens("qq/ww/ee"), orig, merge);
|
map.add(strings("qwe"), tokens("qq/ww/ee"), orig, merge);
|
||||||
map.add(strings("qwe"), tokens("xx"), orig, merge);
|
map.add(strings("qwe"), tokens("xx"), orig, merge);
|
||||||
map.add(strings("qwe"), tokens("yy"), orig, merge);
|
map.add(strings("qwe"), tokens("yy"), orig, merge);
|
||||||
map.add(strings("qwe"), tokens("zz"), orig, merge);
|
map.add(strings("qwe"), tokens("zz"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"$",false), tokens("$"));
|
assertTokEqual(getTokList(map,"$",false), tokens("$"));
|
||||||
assertTokEqual(getTokList(map,"qwe",false), tokens("qq/ww/ee/xx/yy/zz"));
|
assertTokEqual(getTokList(map,"qwe",false), tokens("qq/ww/ee/xx/yy/zz"));
|
||||||
|
|
||||||
// test merging within the map
|
// test merging within the map
|
||||||
|
|
||||||
map.add(strings("a"), tokens("a5,5 a8,3 a10,2"), orig, merge);
|
map.add(strings("a"), tokens("a5,5 a8,3 a10,2"), orig, merge);
|
||||||
map.add(strings("a"), tokens("a3,3 a7,4 a9,2 a11,2 a111,100"), orig, merge);
|
map.add(strings("a"), tokens("a3,3 a7,4 a9,2 a11,2 a111,100"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2 a7,2 a8 a9 a10 a11 a111,100"));
|
assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2 a7,2 a8 a9 a10 a11 a111,100"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOffsets() throws IOException {
|
public void testOffsets() throws IOException {
|
||||||
SynonymMap map = new SynonymMap();
|
SynonymMap map = new SynonymMap();
|
||||||
|
|
||||||
boolean orig = false;
|
boolean orig = false;
|
||||||
boolean merge = true;
|
boolean merge = true;
|
||||||
|
|
||||||
// test that generated tokens start at the same offset as the original
|
// test that generated tokens start at the same offset as the original
|
||||||
map.add(strings("a"), tokens("aa"), orig, merge);
|
map.add(strings("a"), tokens("aa"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"a,5",false), tokens("aa,5"));
|
assertTokEqual(getTokList(map,"a,5",false), tokens("aa,5"));
|
||||||
assertTokEqual(getTokList(map,"a,0",false), tokens("aa,0"));
|
assertTokEqual(getTokList(map,"a,0",false), tokens("aa,0"));
|
||||||
|
|
||||||
// test that offset of first replacement is ignored (always takes the orig offset)
|
// test that offset of first replacement is ignored (always takes the orig offset)
|
||||||
map.add(strings("b"), tokens("bb,100"), orig, merge);
|
map.add(strings("b"), tokens("bb,100"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5"));
|
assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5"));
|
||||||
assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0"));
|
assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0"));
|
||||||
|
|
||||||
// test that subsequent tokens are adjusted accordingly
|
// test that subsequent tokens are adjusted accordingly
|
||||||
map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
|
map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5 c2,2"));
|
assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5 c2,2"));
|
||||||
assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0 c2,2"));
|
assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0 c2,2"));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testOffsetsWithOrig() throws IOException {
|
public void testOffsetsWithOrig() throws IOException {
|
||||||
SynonymMap map = new SynonymMap();
|
SynonymMap map = new SynonymMap();
|
||||||
|
|
||||||
boolean orig = true;
|
boolean orig = true;
|
||||||
boolean merge = true;
|
boolean merge = true;
|
||||||
|
|
||||||
// test that generated tokens start at the same offset as the original
|
// test that generated tokens start at the same offset as the original
|
||||||
map.add(strings("a"), tokens("aa"), orig, merge);
|
map.add(strings("a"), tokens("aa"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"a,5",false), tokens("a,5/aa"));
|
assertTokEqual(getTokList(map,"a,5",false), tokens("a,5/aa"));
|
||||||
assertTokEqual(getTokList(map,"a,0",false), tokens("a,0/aa"));
|
assertTokEqual(getTokList(map,"a,0",false), tokens("a,0/aa"));
|
||||||
|
|
||||||
// test that offset of first replacement is ignored (always takes the orig offset)
|
// test that offset of first replacement is ignored (always takes the orig offset)
|
||||||
map.add(strings("b"), tokens("bb,100"), orig, merge);
|
map.add(strings("b"), tokens("bb,100"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5/b"));
|
assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5/b"));
|
||||||
assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0/b"));
|
assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0/b"));
|
||||||
|
|
||||||
// test that subsequent tokens are adjusted accordingly
|
// test that subsequent tokens are adjusted accordingly
|
||||||
map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
|
map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
|
||||||
assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5/c c2,2"));
|
assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5/c c2,2"));
|
||||||
assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0/c c2,2"));
|
assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0/c c2,2"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,49 +1,53 @@
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
import org.apache.solr.util.AbstractSolrTestCase;
|
import org.apache.solr.util.AbstractSolrTestCase;
|
||||||
import org.apache.solr.util.TestHarness;
|
import org.apache.solr.util.TestHarness;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* New WordDelimiterFilter tests... most of the tests are in ConvertedLegacyTest
|
* New WordDelimiterFilter tests... most of the tests are in ConvertedLegacyTest
|
||||||
*/
|
*/
|
||||||
public class TestWordDelimiterFilter extends AbstractSolrTestCase {
|
public class TestWordDelimiterFilter extends AbstractSolrTestCase {
|
||||||
public String getSchemaFile() { return "solr/conf/schema.xml"; }
|
public String getSchemaFile() { return "solr/conf/schema.xml"; }
|
||||||
public String getSolrConfigFile() { return "solr/conf/solrconfig.xml"; }
|
public String getSolrConfigFile() { return "solr/conf/solrconfig.xml"; }
|
||||||
|
|
||||||
|
|
||||||
public void posTst(String v1, String v2, String s1, String s2) {
|
public void posTst(String v1, String v2, String s1, String s2) {
|
||||||
assertU(adoc("id", "42",
|
assertU(adoc("id", "42",
|
||||||
"subword", v1,
|
"subword", v1,
|
||||||
"subword", v2));
|
"subword", v2));
|
||||||
assertU(commit());
|
assertU(commit());
|
||||||
|
|
||||||
// there is a positionIncrementGap of 100 between field values, so
|
// there is a positionIncrementGap of 100 between field values, so
|
||||||
// we test if that was maintained.
|
// we test if that was maintained.
|
||||||
assertQ("position increment lost",
|
assertQ("position increment lost",
|
||||||
req("+id:42 +subword:\"" + s1 + ' ' + s2 + "\"~90")
|
req("+id:42 +subword:\"" + s1 + ' ' + s2 + "\"~90")
|
||||||
,"//result[@numFound=0]"
|
,"//result[@numFound=0]"
|
||||||
);
|
);
|
||||||
assertQ("position increment lost",
|
assertQ("position increment lost",
|
||||||
req("+id:42 +subword:\"" + s1 + ' ' + s2 + "\"~110")
|
req("+id:42 +subword:\"" + s1 + ' ' + s2 + "\"~110")
|
||||||
,"//result[@numFound=1]"
|
,"//result[@numFound=1]"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testRetainPositionIncrement() {
|
public void testRetainPositionIncrement() {
|
||||||
posTst("foo","bar","foo","bar");
|
posTst("foo","bar","foo","bar");
|
||||||
posTst("-foo-","-bar-","foo","bar");
|
posTst("-foo-","-bar-","foo","bar");
|
||||||
posTst("foo","bar","-foo-","-bar-");
|
posTst("foo","bar","-foo-","-bar-");
|
||||||
|
|
||||||
posTst("123","456","123","456");
|
posTst("123","456","123","456");
|
||||||
posTst("/123/","/456/","123","456");
|
posTst("/123/","/456/","123","456");
|
||||||
|
|
||||||
posTst("/123/abc","qwe/456/","abc","qwe");
|
posTst("/123/abc","qwe/456/","abc","qwe");
|
||||||
|
|
||||||
posTst("zoo-foo","bar-baz","foo","bar");
|
posTst("zoo-foo","bar-baz","foo","bar");
|
||||||
posTst("zoo-foo-123","456-bar-baz","foo","bar");
|
posTst("zoo-foo-123","456-bar-baz","foo","bar");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testNoGenerationEdgeCase() {
|
||||||
}
|
assertU(adoc("id", "222", "numberpartfail", "123.123.123.123"));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -52,6 +52,15 @@
|
||||||
<fieldtype name="bcdlong" class="solr.BCDLongField" sortMissingLast="true"/>
|
<fieldtype name="bcdlong" class="solr.BCDLongField" sortMissingLast="true"/>
|
||||||
<fieldtype name="bcdstr" class="solr.BCDStrField" sortMissingLast="true"/>
|
<fieldtype name="bcdstr" class="solr.BCDStrField" sortMissingLast="true"/>
|
||||||
|
|
||||||
|
<!-- Field type demonstrating an Analyzer failure -->
|
||||||
|
<fieldtype name="failtype1" class="solr.TextField">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
|
||||||
|
|
||||||
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
||||||
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
|
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
|
||||||
|
@ -311,6 +320,8 @@
|
||||||
<field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
|
<field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
|
||||||
<field name="dedup" type="dedup" indexed="true" stored="true"/>
|
<field name="dedup" type="dedup" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
<field name="numberpartfail" type="failtype1" indexed="true" stored="true"/>
|
||||||
|
|
||||||
<field name="nullfirst" type="string" indexed="true" stored="true" sortMissingFirst="true"/>
|
<field name="nullfirst" type="string" indexed="true" stored="true" sortMissingFirst="true"/>
|
||||||
|
|
||||||
<field name="subword" type="subword" indexed="true" stored="true"/>
|
<field name="subword" type="subword" indexed="true" stored="true"/>
|
||||||
|
|
Loading…
Reference in New Issue