exception if both generate and catenate==0: SOLR-34

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@423235 13f79535-47bb-0310-9956-ffa450edef68
2006-07-18 20:35:17 +00:00 · 2006-07-18 20:35:17 +00:00 · 163f926c44
parent 9de4ebb4fe
commit 163f926c44
5 changed files with 357 additions and 337 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -61,6 +61,8 @@ Bug Fixes
    (Rob Staveley, yonik)
 6. Worked around a Jetty bug that caused invalid XML responses for fields
    containing non ASCII chars.  (Bertrand Delacretaz via yonik, SOLR-32)
+ 7. WordDelimiterFilter can throw exceptions if configured with both
+    generate and catenate off.  (Mike Klaas via yonik, SOLR-34)

 Other Changes
 1. Upgrade to Lucene 2.0 nightly build 2006-06-22, lucene SVN revision 416224,
--- a/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
+++ b/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
@ -329,16 +329,16 @@ final class WordDelimiterFilter extends TokenFilter {
      if (numWords==0) {
        // all numbers
        addCombos(tlist,0,numtok,generateNumberParts!=0,catenateNumbers!=0 || catenateAll!=0, 1);
-        break;
+        if (queue.size() > 0) break; else continue;
      } else if (numNumbers==0) {
        // all words
        addCombos(tlist,0,numtok,generateWordParts!=0,catenateWords!=0 || catenateAll!=0, 1);
-        break;
+        if (queue.size() > 0) break; else continue;
      } else if (generateNumberParts==0 && generateWordParts==0 && catenateNumbers==0 && catenateWords==0) {
        // catenate all *only*
        // OPT:could be optimized to add to current queue...
        addCombos(tlist,0,numtok,false,catenateAll!=0, 1);
-        break;
+        if (queue.size() > 0) break; else continue;
      }

      //
@ -369,7 +369,10 @@ final class WordDelimiterFilter extends TokenFilter {
        addCombos(tlist,0,numtok,false,true,0);
      }

-      break;
+      // NOTE: in certain cases, queue may be empty (for instance, if catenate
+      // and generate are both set to false).  In this case, we should proceed
+      // to next token rather than throwing ArrayOutOfBounds
+      if (queue.size() > 0) break; else continue;
    }

    // System.out.println("##########AFTER COMBINATIONS:"+ str(queue));
--- a/src/test/org/apache/solr/analysis/TestSynonymFilter.java
+++ b/src/test/org/apache/solr/analysis/TestSynonymFilter.java
@ -1,284 +1,284 @@
-/**
- * Copyright 2006 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.analysis;
-
-import junit.framework.TestCase;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-
-/**
- * @author yonik
- * @version $Id$
- */
-public class TestSynonymFilter extends TestCase {
-
-  public List strings(String str) {
-    String[] arr = str.split(" ");
-    return Arrays.asList(arr);
-  }
-
-  /***
-   * Return a list of tokens according to a test string format:
-   * a b c  =>  returns List<Token> [a,b,c]
-   * a/b   => tokens a and b share the same spot (b.positionIncrement=0)
-   * a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
-   */
-  public List tokens(String str) {
-    String[] arr = str.split(" ");
-    List result = new ArrayList();
-    for (int i=0; i<arr.length; i++) {
-      String[] toks = arr[i].split("/");
-      String[] params = toks[0].split(",");
-      Token t = new Token(params[0],0,0,"TEST");
-      if (params.length > 1) t.setPositionIncrement(Integer.parseInt(params[1]));
-      result.add(t);
-      for (int j=1; j<toks.length; j++) {
-        t = new Token(toks[j],0,0,"TEST");
-        t.setPositionIncrement(0);
-        result.add(t);
-      }
-    }
-    return result;
-  }
-
-  public List getTokList(SynonymMap dict, String input, boolean includeOrig) throws IOException {
-    ArrayList lst = new ArrayList();
-    final List toks = tokens(input);
-    TokenStream ts = new TokenStream() {
-      Iterator iter = toks.iterator();
-      public Token next() throws IOException {
-        return iter.hasNext() ? (Token)iter.next() : null;
-      }
-    };
-
-    SynonymFilter sf = new SynonymFilter(ts, dict, true);
-
-    while(true) {
-      Token t = sf.next();
-      if (t==null) return lst;
-      lst.add(t);
-    }
-  }
-
-  public List tok2str(List tokLst) {
-    ArrayList lst = new ArrayList();
-    for (Iterator iter = tokLst.iterator(); iter.hasNext();) {
-      lst.add(((Token)(iter.next())).termText());
-    }
-    return lst;
-  }
-
-
-  public void assertTokEqual(List a, List b) {
-    assertTokEq(a,b);
-    assertTokEq(b,a);
-  }
-
-  private void assertTokEq(List a, List b) {
-    int pos=0;
-    for (Iterator iter = a.iterator(); iter.hasNext();) {
-      Token tok = (Token)iter.next();
-      pos += tok.getPositionIncrement();
-      if (!tokAt(b, tok.termText(), pos)) {
-        fail(a + "!=" + b);
-      }
-    }
-  }
-
-  public boolean tokAt(List lst, String val, int tokPos) {
-    int pos=0;
-    for (Iterator iter = lst.iterator(); iter.hasNext();) {
-      Token tok = (Token)iter.next();
-      pos += tok.getPositionIncrement();
-      if (pos==tokPos && tok.termText().equals(val)) return true;
-    }
-    return false;
-  }
-
-
-  public void testMatching() throws IOException {
-    SynonymMap map = new SynonymMap();
-
-    boolean orig = false;
-    boolean merge = true;
-    map.add(strings("a b"), tokens("ab"), orig, merge);
-    map.add(strings("a c"), tokens("ac"), orig, merge);
-    map.add(strings("a"), tokens("aa"), orig, merge);
-    map.add(strings("b"), tokens("bb"), orig, merge);
-    map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
-    map.add(strings("x c"), tokens("xc"), orig, merge);
-
-    // System.out.println(map);
-    // System.out.println(getTokList(map,"a",false));
-
-    assertTokEqual(getTokList(map,"$",false), tokens("$"));
-    assertTokEqual(getTokList(map,"a",false), tokens("aa"));
-    assertTokEqual(getTokList(map,"a $",false), tokens("aa $"));
-    assertTokEqual(getTokList(map,"$ a",false), tokens("$ aa"));
-    assertTokEqual(getTokList(map,"a a",false), tokens("aa aa"));
-    assertTokEqual(getTokList(map,"b",false), tokens("bb"));
-    assertTokEqual(getTokList(map,"z x c v",false), tokens("zxcv"));
-    assertTokEqual(getTokList(map,"z x c $",false), tokens("z xc $"));
-
-    // repeats
-    map.add(strings("a b"), tokens("ab"), orig, merge);
-    map.add(strings("a b"), tokens("ab"), orig, merge);
-    assertTokEqual(getTokList(map,"a b",false), tokens("ab"));
-
-    // check for lack of recursion
-    map.add(strings("zoo"), tokens("zoo"), orig, merge);
-    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo $ zoo"));
-    map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
-    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo zoo zoo $ zoo zoo"));
-  }
-
-  public void testIncludeOrig() throws IOException {
-    SynonymMap map = new SynonymMap();
-
-    boolean orig = true;
-    boolean merge = true;
-    map.add(strings("a b"), tokens("ab"), orig, merge);
-    map.add(strings("a c"), tokens("ac"), orig, merge);
-    map.add(strings("a"), tokens("aa"), orig, merge);
-    map.add(strings("b"), tokens("bb"), orig, merge);
-    map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
-    map.add(strings("x c"), tokens("xc"), orig, merge);
-
-    // System.out.println(map);
-    // System.out.println(getTokList(map,"a",false));
-
-    assertTokEqual(getTokList(map,"$",false), tokens("$"));
-    assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
-    assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
-    assertTokEqual(getTokList(map,"$ a",false), tokens("$ a/aa"));
-    assertTokEqual(getTokList(map,"a $",false), tokens("a/aa $"));
-    assertTokEqual(getTokList(map,"$ a !",false), tokens("$ a/aa !"));
-    assertTokEqual(getTokList(map,"a a",false), tokens("a/aa a/aa"));
-    assertTokEqual(getTokList(map,"b",false), tokens("b/bb"));
-    assertTokEqual(getTokList(map,"z x c v",false), tokens("z/zxcv x c v"));
-    assertTokEqual(getTokList(map,"z x c $",false), tokens("z x/xc c $"));
-
-    // check for lack of recursion
-    map.add(strings("zoo zoo"), tokens("zoo"), orig, merge);
-    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo/zoo $ zoo/zoo"));
-    map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
-    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo $ zoo/zoo zoo"));
-  }
-
-
-  public void testMapMerge() throws IOException {
-    SynonymMap map = new SynonymMap();
-
-    boolean orig = false;
-    boolean merge = true;
-    map.add(strings("a"), tokens("a5,5"), orig, merge);
-    map.add(strings("a"), tokens("a3,3"), orig, merge);
-    // System.out.println(map);
-    assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2"));
-
-    map.add(strings("b"), tokens("b3,3"), orig, merge);
-    map.add(strings("b"), tokens("b5,5"), orig, merge);
-    //System.out.println(map);
-    assertTokEqual(getTokList(map,"b",false), tokens("b3 b5,2"));
-
-
-    map.add(strings("a"), tokens("A3,3"), orig, merge);
-    map.add(strings("a"), tokens("A5,5"), orig, merge);
-    assertTokEqual(getTokList(map,"a",false), tokens("a3/A3 a5,2/A5"));
-
-    map.add(strings("a"), tokens("a1"), orig, merge);
-    assertTokEqual(getTokList(map,"a",false), tokens("a1 a3,2/A3 a5,2/A5"));
-
-    map.add(strings("a"), tokens("a2,2"), orig, merge);
-    map.add(strings("a"), tokens("a4,4 a6,2"), orig, merge);
-    assertTokEqual(getTokList(map,"a",false), tokens("a1 a2 a3/A3 a4 a5/A5 a6"));
-  }
-
-
-  public void testOverlap() throws IOException {
-    SynonymMap map = new SynonymMap();
-
-    boolean orig = false;
-    boolean merge = true;
-    map.add(strings("qwe"), tokens("qq/ww/ee"), orig, merge);
-    map.add(strings("qwe"), tokens("xx"), orig, merge);
-    map.add(strings("qwe"), tokens("yy"), orig, merge);
-    map.add(strings("qwe"), tokens("zz"), orig, merge);
-    assertTokEqual(getTokList(map,"$",false), tokens("$"));
-    assertTokEqual(getTokList(map,"qwe",false), tokens("qq/ww/ee/xx/yy/zz"));
-
-    // test merging within the map
-
-    map.add(strings("a"), tokens("a5,5 a8,3 a10,2"), orig, merge);
-    map.add(strings("a"), tokens("a3,3 a7,4 a9,2 a11,2 a111,100"), orig, merge);
-    assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2 a7,2 a8 a9 a10 a11 a111,100"));
-  }
-
-  public void testOffsets() throws IOException {
-    SynonymMap map = new SynonymMap();
-
-    boolean orig = false;
-    boolean merge = true;
-
-    // test that generated tokens start at the same offset as the original
-    map.add(strings("a"), tokens("aa"), orig, merge);
-    assertTokEqual(getTokList(map,"a,5",false), tokens("aa,5"));
-    assertTokEqual(getTokList(map,"a,0",false), tokens("aa,0"));
-
-    // test that offset of first replacement is ignored (always takes the orig offset)
-    map.add(strings("b"), tokens("bb,100"), orig, merge);
-    assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5"));
-    assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0"));
-
-    // test that subsequent tokens are adjusted accordingly
-    map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
-    assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5 c2,2"));
-    assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0 c2,2"));
-
-  }
-
-
-  public void testOffsetsWithOrig() throws IOException {
-    SynonymMap map = new SynonymMap();
-
-    boolean orig = true;
-    boolean merge = true;
-
-    // test that generated tokens start at the same offset as the original
-    map.add(strings("a"), tokens("aa"), orig, merge);
-    assertTokEqual(getTokList(map,"a,5",false), tokens("a,5/aa"));
-    assertTokEqual(getTokList(map,"a,0",false), tokens("a,0/aa"));
-
-    // test that offset of first replacement is ignored (always takes the orig offset)
-    map.add(strings("b"), tokens("bb,100"), orig, merge);
-    assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5/b"));
-    assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0/b"));
-
-    // test that subsequent tokens are adjusted accordingly
-    map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
-    assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5/c c2,2"));
-    assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0/c c2,2"));
-  }
-
-
-}
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * @author yonik
+ * @version $Id$
+ */
+public class TestSynonymFilter extends TestCase {
+
+  public List strings(String str) {
+    String[] arr = str.split(" ");
+    return Arrays.asList(arr);
+  }
+
+  /***
+   * Return a list of tokens according to a test string format:
+   * a b c  =>  returns List<Token> [a,b,c]
+   * a/b   => tokens a and b share the same spot (b.positionIncrement=0)
+   * a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
+   */
+  public List tokens(String str) {
+    String[] arr = str.split(" ");
+    List result = new ArrayList();
+    for (int i=0; i<arr.length; i++) {
+      String[] toks = arr[i].split("/");
+      String[] params = toks[0].split(",");
+      Token t = new Token(params[0],0,0,"TEST");
+      if (params.length > 1) t.setPositionIncrement(Integer.parseInt(params[1]));
+      result.add(t);
+      for (int j=1; j<toks.length; j++) {
+        t = new Token(toks[j],0,0,"TEST");
+        t.setPositionIncrement(0);
+        result.add(t);
+      }
+    }
+    return result;
+  }
+
+  public List getTokList(SynonymMap dict, String input, boolean includeOrig) throws IOException {
+    ArrayList lst = new ArrayList();
+    final List toks = tokens(input);
+    TokenStream ts = new TokenStream() {
+      Iterator iter = toks.iterator();
+      public Token next() throws IOException {
+        return iter.hasNext() ? (Token)iter.next() : null;
+      }
+    };
+
+    SynonymFilter sf = new SynonymFilter(ts, dict, true);
+
+    while(true) {
+      Token t = sf.next();
+      if (t==null) return lst;
+      lst.add(t);
+    }
+  }
+
+  public List tok2str(List tokLst) {
+    ArrayList lst = new ArrayList();
+    for (Iterator iter = tokLst.iterator(); iter.hasNext();) {
+      lst.add(((Token)(iter.next())).termText());
+    }
+    return lst;
+  }
+
+
+  public void assertTokEqual(List a, List b) {
+    assertTokEq(a,b);
+    assertTokEq(b,a);
+  }
+
+  private void assertTokEq(List a, List b) {
+    int pos=0;
+    for (Iterator iter = a.iterator(); iter.hasNext();) {
+      Token tok = (Token)iter.next();
+      pos += tok.getPositionIncrement();
+      if (!tokAt(b, tok.termText(), pos)) {
+        fail(a + "!=" + b);
+      }
+    }
+  }
+
+  public boolean tokAt(List lst, String val, int tokPos) {
+    int pos=0;
+    for (Iterator iter = lst.iterator(); iter.hasNext();) {
+      Token tok = (Token)iter.next();
+      pos += tok.getPositionIncrement();
+      if (pos==tokPos && tok.termText().equals(val)) return true;
+    }
+    return false;
+  }
+
+
+  public void testMatching() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = false;
+    boolean merge = true;
+    map.add(strings("a b"), tokens("ab"), orig, merge);
+    map.add(strings("a c"), tokens("ac"), orig, merge);
+    map.add(strings("a"), tokens("aa"), orig, merge);
+    map.add(strings("b"), tokens("bb"), orig, merge);
+    map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
+    map.add(strings("x c"), tokens("xc"), orig, merge);
+
+    // System.out.println(map);
+    // System.out.println(getTokList(map,"a",false));
+
+    assertTokEqual(getTokList(map,"$",false), tokens("$"));
+    assertTokEqual(getTokList(map,"a",false), tokens("aa"));
+    assertTokEqual(getTokList(map,"a $",false), tokens("aa $"));
+    assertTokEqual(getTokList(map,"$ a",false), tokens("$ aa"));
+    assertTokEqual(getTokList(map,"a a",false), tokens("aa aa"));
+    assertTokEqual(getTokList(map,"b",false), tokens("bb"));
+    assertTokEqual(getTokList(map,"z x c v",false), tokens("zxcv"));
+    assertTokEqual(getTokList(map,"z x c $",false), tokens("z xc $"));
+
+    // repeats
+    map.add(strings("a b"), tokens("ab"), orig, merge);
+    map.add(strings("a b"), tokens("ab"), orig, merge);
+    assertTokEqual(getTokList(map,"a b",false), tokens("ab"));
+
+    // check for lack of recursion
+    map.add(strings("zoo"), tokens("zoo"), orig, merge);
+    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo $ zoo"));
+    map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
+    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo zoo zoo $ zoo zoo"));
+  }
+
+  public void testIncludeOrig() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = true;
+    boolean merge = true;
+    map.add(strings("a b"), tokens("ab"), orig, merge);
+    map.add(strings("a c"), tokens("ac"), orig, merge);
+    map.add(strings("a"), tokens("aa"), orig, merge);
+    map.add(strings("b"), tokens("bb"), orig, merge);
+    map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
+    map.add(strings("x c"), tokens("xc"), orig, merge);
+
+    // System.out.println(map);
+    // System.out.println(getTokList(map,"a",false));
+
+    assertTokEqual(getTokList(map,"$",false), tokens("$"));
+    assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
+    assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
+    assertTokEqual(getTokList(map,"$ a",false), tokens("$ a/aa"));
+    assertTokEqual(getTokList(map,"a $",false), tokens("a/aa $"));
+    assertTokEqual(getTokList(map,"$ a !",false), tokens("$ a/aa !"));
+    assertTokEqual(getTokList(map,"a a",false), tokens("a/aa a/aa"));
+    assertTokEqual(getTokList(map,"b",false), tokens("b/bb"));
+    assertTokEqual(getTokList(map,"z x c v",false), tokens("z/zxcv x c v"));
+    assertTokEqual(getTokList(map,"z x c $",false), tokens("z x/xc c $"));
+
+    // check for lack of recursion
+    map.add(strings("zoo zoo"), tokens("zoo"), orig, merge);
+    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo/zoo $ zoo/zoo"));
+    map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
+    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo $ zoo/zoo zoo"));
+  }
+
+
+  public void testMapMerge() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = false;
+    boolean merge = true;
+    map.add(strings("a"), tokens("a5,5"), orig, merge);
+    map.add(strings("a"), tokens("a3,3"), orig, merge);
+    // System.out.println(map);
+    assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2"));
+
+    map.add(strings("b"), tokens("b3,3"), orig, merge);
+    map.add(strings("b"), tokens("b5,5"), orig, merge);
+    //System.out.println(map);
+    assertTokEqual(getTokList(map,"b",false), tokens("b3 b5,2"));
+
+
+    map.add(strings("a"), tokens("A3,3"), orig, merge);
+    map.add(strings("a"), tokens("A5,5"), orig, merge);
+    assertTokEqual(getTokList(map,"a",false), tokens("a3/A3 a5,2/A5"));
+
+    map.add(strings("a"), tokens("a1"), orig, merge);
+    assertTokEqual(getTokList(map,"a",false), tokens("a1 a3,2/A3 a5,2/A5"));
+
+    map.add(strings("a"), tokens("a2,2"), orig, merge);
+    map.add(strings("a"), tokens("a4,4 a6,2"), orig, merge);
+    assertTokEqual(getTokList(map,"a",false), tokens("a1 a2 a3/A3 a4 a5/A5 a6"));
+  }
+
+
+  public void testOverlap() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = false;
+    boolean merge = true;
+    map.add(strings("qwe"), tokens("qq/ww/ee"), orig, merge);
+    map.add(strings("qwe"), tokens("xx"), orig, merge);
+    map.add(strings("qwe"), tokens("yy"), orig, merge);
+    map.add(strings("qwe"), tokens("zz"), orig, merge);
+    assertTokEqual(getTokList(map,"$",false), tokens("$"));
+    assertTokEqual(getTokList(map,"qwe",false), tokens("qq/ww/ee/xx/yy/zz"));
+
+    // test merging within the map
+
+    map.add(strings("a"), tokens("a5,5 a8,3 a10,2"), orig, merge);
+    map.add(strings("a"), tokens("a3,3 a7,4 a9,2 a11,2 a111,100"), orig, merge);
+    assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2 a7,2 a8 a9 a10 a11 a111,100"));
+  }
+
+  public void testOffsets() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = false;
+    boolean merge = true;
+
+    // test that generated tokens start at the same offset as the original
+    map.add(strings("a"), tokens("aa"), orig, merge);
+    assertTokEqual(getTokList(map,"a,5",false), tokens("aa,5"));
+    assertTokEqual(getTokList(map,"a,0",false), tokens("aa,0"));
+
+    // test that offset of first replacement is ignored (always takes the orig offset)
+    map.add(strings("b"), tokens("bb,100"), orig, merge);
+    assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5"));
+    assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0"));
+
+    // test that subsequent tokens are adjusted accordingly
+    map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
+    assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5 c2,2"));
+    assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0 c2,2"));
+
+  }
+
+
+  public void testOffsetsWithOrig() throws IOException {
+    SynonymMap map = new SynonymMap();
+
+    boolean orig = true;
+    boolean merge = true;
+
+    // test that generated tokens start at the same offset as the original
+    map.add(strings("a"), tokens("aa"), orig, merge);
+    assertTokEqual(getTokList(map,"a,5",false), tokens("a,5/aa"));
+    assertTokEqual(getTokList(map,"a,0",false), tokens("a,0/aa"));
+
+    // test that offset of first replacement is ignored (always takes the orig offset)
+    map.add(strings("b"), tokens("bb,100"), orig, merge);
+    assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5/b"));
+    assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0/b"));
+
+    // test that subsequent tokens are adjusted accordingly
+    map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
+    assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5/c c2,2"));
+    assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0/c c2,2"));
+  }
+
+
+}
--- a/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
+++ b/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
@ -1,49 +1,53 @@
-package org.apache.solr.analysis;
-
-import org.apache.solr.util.AbstractSolrTestCase;
-import org.apache.solr.util.TestHarness;
-import org.apache.solr.request.SolrQueryRequest;
-
-/**
- * New WordDelimiterFilter tests... most of the tests are in ConvertedLegacyTest
- */
-public class TestWordDelimiterFilter extends AbstractSolrTestCase {
-  public String getSchemaFile() { return "solr/conf/schema.xml"; }
-  public String getSolrConfigFile() { return "solr/conf/solrconfig.xml"; }
-
-
-  public void posTst(String v1, String v2, String s1, String s2) {
-    assertU(adoc("id",  "42",
-                 "subword", v1,
-                 "subword", v2));
-    assertU(commit());
-
-    // there is a positionIncrementGap of 100 between field values, so
-    // we test if that was maintained.
-    assertQ("position increment lost",
-            req("+id:42 +subword:\"" + s1 + ' ' + s2 + "\"~90")
-            ,"//result[@numFound=0]"
-    );
-    assertQ("position increment lost",
-            req("+id:42 +subword:\"" + s1 + ' ' + s2 + "\"~110")
-            ,"//result[@numFound=1]"
-    );
-  }
-
-
-  public void testRetainPositionIncrement() {
-    posTst("foo","bar","foo","bar");
-    posTst("-foo-","-bar-","foo","bar");
-    posTst("foo","bar","-foo-","-bar-");
-
-    posTst("123","456","123","456");
-    posTst("/123/","/456/","123","456");
-
-    posTst("/123/abc","qwe/456/","abc","qwe");
-
-    posTst("zoo-foo","bar-baz","foo","bar");
-    posTst("zoo-foo-123","456-bar-baz","foo","bar");
-  }
-
-
-}
+package org.apache.solr.analysis;
+
+import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.solr.util.TestHarness;
+import org.apache.solr.request.SolrQueryRequest;
+
+/**
+ * New WordDelimiterFilter tests... most of the tests are in ConvertedLegacyTest
+ */
+public class TestWordDelimiterFilter extends AbstractSolrTestCase {
+  public String getSchemaFile() { return "solr/conf/schema.xml"; }
+  public String getSolrConfigFile() { return "solr/conf/solrconfig.xml"; }
+
+
+  public void posTst(String v1, String v2, String s1, String s2) {
+    assertU(adoc("id",  "42",
+                 "subword", v1,
+                 "subword", v2));
+    assertU(commit());
+
+    // there is a positionIncrementGap of 100 between field values, so
+    // we test if that was maintained.
+    assertQ("position increment lost",
+            req("+id:42 +subword:\"" + s1 + ' ' + s2 + "\"~90")
+            ,"//result[@numFound=0]"
+    );
+    assertQ("position increment lost",
+            req("+id:42 +subword:\"" + s1 + ' ' + s2 + "\"~110")
+            ,"//result[@numFound=1]"
+    );
+  }
+
+
+  public void testRetainPositionIncrement() {
+    posTst("foo","bar","foo","bar");
+    posTst("-foo-","-bar-","foo","bar");
+    posTst("foo","bar","-foo-","-bar-");
+
+    posTst("123","456","123","456");
+    posTst("/123/","/456/","123","456");
+
+    posTst("/123/abc","qwe/456/","abc","qwe");
+
+    posTst("zoo-foo","bar-baz","foo","bar");
+    posTst("zoo-foo-123","456-bar-baz","foo","bar");
+  }
+
+  public void testNoGenerationEdgeCase() {
+    assertU(adoc("id", "222", "numberpartfail", "123.123.123.123"));
+  }
+
+
+}
--- a/src/test/test-files/solr/conf/schema.xml
+++ b/src/test/test-files/solr/conf/schema.xml
@ -52,6 +52,15 @@
    <fieldtype name="bcdlong" class="solr.BCDLongField" sortMissingLast="true"/>
    <fieldtype name="bcdstr" class="solr.BCDStrField" sortMissingLast="true"/>

+    <!-- Field type demonstrating an Analyzer failure -->
+    <fieldtype name="failtype1" class="solr.TextField">
+      <analyzer type="index">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+

    <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
    <fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
@ -311,6 +320,8 @@
   <field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
   <field name="dedup" type="dedup" indexed="true" stored="true"/>

+   <field name="numberpartfail" type="failtype1" indexed="true" stored="true"/>
+
   <field name="nullfirst" type="string" indexed="true" stored="true" sortMissingFirst="true"/>

   <field name="subword" type="subword" indexed="true" stored="true"/>